From 261ffd53cc8e91e6484a3170a1ddf59a16696667 Mon Sep 17 00:00:00 2001 From: Nuno Das Neves Date: Tue, 1 Apr 2025 10:32:17 -0700 Subject: [PATCH 001/559] Drivers: hv: Fix bad pointer dereference in hv_get_partition_id 'output' is already a pointer to the output argument, it should be passed directly to hv_do_hypercall() without the '&' operator. Fixes: e96204e5e96e ("hyperv: Move hv_current_partition_id to arch-generic code") Signed-off-by: Nuno Das Neves Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1743528737-20310-1-git-send-email-nunodasneves@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <1743528737-20310-1-git-send-email-nunodasneves@linux.microsoft.com> --- drivers/hv/hv_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index b3b11be11650..a7d7494feaca 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -307,7 +307,7 @@ void __init hv_get_partition_id(void) local_irq_save(flags); output = *this_cpu_ptr(hyperv_pcpu_input_arg); - status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, &output); + status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output); pt_id = output->partition_id; local_irq_restore(flags); From 549d8994447f2f628c6cedd139d53926bdfee881 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Mar 2025 22:38:55 +0100 Subject: [PATCH 002/559] media: vivid: fix FB dependency It's not enough to have a dependency on CONFIG_FB, as that can be in a loadable module when vivid itself is builtin: drivers/media/test-drivers/vivid/vivid-osd.o: in function `vivid_fb_init': vivid-osd.c:(.text+0xdc0): undefined reference to `fb_alloc_cmap' vivid-osd.c:(.text+0xe26): undefined reference to `register_framebuffer' Change the dependency to only allow configurations that can be built, but change the FB to FB_CORE so this is also possible when using DRM with FB compatibility rather than full fbdev. Fixes: 20889ddede38 ("media: vivid: Introduce VIDEO_VIVID_OSD") Signed-off-by: Arnd Bergmann Reviewed-by: Ricardo Ribalda Signed-off-by: Hans Verkuil --- drivers/media/test-drivers/vivid/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/test-drivers/vivid/Kconfig b/drivers/media/test-drivers/vivid/Kconfig index e95edc0f22bf..cc470070a7a5 100644 --- a/drivers/media/test-drivers/vivid/Kconfig +++ b/drivers/media/test-drivers/vivid/Kconfig @@ -32,7 +32,8 @@ config VIDEO_VIVID_CEC config VIDEO_VIVID_OSD bool "Enable Framebuffer for testing Output Overlay" - depends on VIDEO_VIVID && FB + depends on VIDEO_VIVID && FB_CORE + depends on VIDEO_VIVID=m || FB_CORE=y default y select FB_IOMEM_HELPERS help From 9df181c8de1b6b285556f80bfd02584f3457f32e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Mar 2025 16:46:30 +0100 Subject: [PATCH 003/559] media: i2c: lt6911uxe: Fix Kconfig dependencies: The new driver fails to build if I2C is disabled: drivers/media/i2c/lt6911uxe.c:703:1: error: data definition has no type or storage class [-Werror] 703 | module_i2c_driver(lt6911uxe_i2c_driver); or if I2C is on but V4L2_CCI_I2C is not: ERROR: modpost: "cci_write" [drivers/media/i2c/lt6911uxe.ko] undefined! ERROR: modpost: "cci_read" [drivers/media/i2c/lt6911uxe.ko] undefined! For both by adding a dependency on I2C and selecting V4L2_CCI_I2C, which follows the common practice for these. Fixes: e49563c3be09 ("media: i2c: add lt6911uxe hdmi bridge driver") Signed-off-by: Arnd Bergmann Signed-off-by: Hans Verkuil --- drivers/media/i2c/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig index e576b213084d..b06365d02ef1 100644 --- a/drivers/media/i2c/Kconfig +++ b/drivers/media/i2c/Kconfig @@ -1149,8 +1149,9 @@ config VIDEO_ISL7998X config VIDEO_LT6911UXE tristate "Lontium LT6911UXE decoder" - depends on ACPI && VIDEO_DEV + depends on ACPI && VIDEO_DEV && I2C select V4L2_FWNODE + select V4L2_CCI_I2C help This is a Video4Linux2 sensor-level driver for the Lontium LT6911UXE HDMI to MIPI CSI-2 bridge. From 0dce5b44bd38af20b0383ae4cabeead37b4b9a9a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 18 Mar 2025 15:03:27 +0100 Subject: [PATCH 004/559] media: platform: synopsys: VIDEO_SYNOPSYS_HDMIRX should depend on ARCH_ROCKCHIP For now, the Synopsys HDMI HDMI RX Controller is only supported on Rockchip RK3588 SoCs. Hence add a dependency on ARCH_ROCKCHIP, to prevent asking the user about this driver when configuring a kernel without Rockchip SoC support. Fixes: 7b59b132ad4398f9 ("media: platform: synopsys: Add support for HDMI input driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Shreeya Patel Signed-off-by: Hans Verkuil --- drivers/media/platform/synopsys/hdmirx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/synopsys/hdmirx/Kconfig b/drivers/media/platform/synopsys/hdmirx/Kconfig index 27e6706f84a3..4321f985f632 100644 --- a/drivers/media/platform/synopsys/hdmirx/Kconfig +++ b/drivers/media/platform/synopsys/hdmirx/Kconfig @@ -2,6 +2,7 @@ config VIDEO_SYNOPSYS_HDMIRX tristate "Synopsys DesignWare HDMI Receiver driver" + depends on ARCH_ROCKCHIP || COMPILE_TEST depends on VIDEO_DEV select MEDIA_CONTROLLER select VIDEO_V4L2_SUBDEV_API From 118b34092e37da1d3c4808e8cd1dd0246ac3f97e Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 19 Mar 2025 11:32:56 +0100 Subject: [PATCH 005/559] media: i2c: lt6911uxe: add two selects to Kconfig In order to get the v4l2_subdev functions you need to select MEDIA_CONTROLLER and VIDEO_V4L2_SUBDEV_API. Signed-off-by: Hans Verkuil Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503151002.HacBN2LO-lkp@intel.com/ Fixes: e49563c3be09 ("media: i2c: add lt6911uxe hdmi bridge driver") --- drivers/media/i2c/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig index b06365d02ef1..e45ba127069f 100644 --- a/drivers/media/i2c/Kconfig +++ b/drivers/media/i2c/Kconfig @@ -1152,6 +1152,8 @@ config VIDEO_LT6911UXE depends on ACPI && VIDEO_DEV && I2C select V4L2_FWNODE select V4L2_CCI_I2C + select MEDIA_CONTROLLER + select VIDEO_V4L2_SUBDEV_API help This is a Video4Linux2 sensor-level driver for the Lontium LT6911UXE HDMI to MIPI CSI-2 bridge. From d51adf038ebe59b592005166209b70218b1da849 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 27 Feb 2025 15:02:46 +0100 Subject: [PATCH 006/559] media: cec: tda9950: add back i2c dependency drivers/media/cec/i2c/tda9950.c: In function 'tda9950_write_range': drivers/media/cec/i2c/tda9950.c:92:15: error: implicit declaration of function 'i2c_transfer' [-Wimplicit-function-declaration] 92 | ret = i2c_transfer(client->adapter, &msg, 1); | ^~~~~~~~~~~~ drivers/media/cec/i2c/tda9950.c: In function 'tda9950_probe': drivers/media/cec/i2c/tda9950.c:391:14: error: implicit declaration of function 'i2c_check_functionality' [-Wimplicit-function-declaration] 391 | if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { | ^~~~~~~~~~~~~~~~~~~~~~~ Fixes: caa6f4a75e9f ("media: cec: move driver for TDA9950 from drm/i2c") Signed-off-by: Arnd Bergmann Signed-off-by: Hans Verkuil --- drivers/media/cec/i2c/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/cec/i2c/Kconfig b/drivers/media/cec/i2c/Kconfig index b9d21643eef1..c31abc26f602 100644 --- a/drivers/media/cec/i2c/Kconfig +++ b/drivers/media/cec/i2c/Kconfig @@ -16,6 +16,7 @@ config CEC_CH7322 config CEC_NXP_TDA9950 tristate "NXP Semiconductors TDA9950/TDA998X HDMI CEC" + depends on I2C select CEC_NOTIFIER select CEC_CORE default DRM_I2C_NXP_TDA998X From 06eaa824fd239edd1eab2754f29b2d03da313003 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 18 Mar 2025 07:19:46 +0000 Subject: [PATCH 007/559] mm/memblock: pass size instead of end to memblock_set_node() The second parameter of memblock_set_node() is size instead of end. Since it iterates from lower address to higher address, finally the node id is correct. But during the process, some of them are wrong. Pass size instead of end. Fixes: 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") Signed-off-by: Wei Yang CC: Mike Rapoport CC: Yajun Deng CC: stable@vger.kernel.org Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20250318071948.23854-2-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index 0a53db4d9f7b..9639f04b4fdf 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2196,7 +2196,7 @@ static void __init memmap_init_reserved_pages(void) if (memblock_is_nomap(region)) reserve_bootmem_region(start, end, nid); - memblock_set_node(start, end, &memblock.reserved, nid); + memblock_set_node(start, region->size, &memblock.reserved, nid); } /* From eac8ea8736ccc09513152d970eb2a42ed78e87e8 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 18 Mar 2025 07:19:47 +0000 Subject: [PATCH 008/559] mm/memblock: repeat setting reserved region nid if array is doubled Commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") introduce a way to set nid to all reserved region. But there is a corner case it will leave some region with invalid nid. When memblock_set_node() doubles the array of memblock.reserved, it may lead to a new reserved region before current position. The new region will be left with an invalid node id. Repeat the process when detecting it. Fixes: 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") Signed-off-by: Wei Yang CC: Mike Rapoport CC: Yajun Deng CC: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250318071948.23854-3-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- mm/memblock.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mm/memblock.c b/mm/memblock.c index 9639f04b4fdf..d3509414b8c3 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2183,11 +2183,14 @@ static void __init memmap_init_reserved_pages(void) struct memblock_region *region; phys_addr_t start, end; int nid; + unsigned long max_reserved; /* * set nid on all reserved pages and also treat struct * pages for the NOMAP regions as PageReserved */ +repeat: + max_reserved = memblock.reserved.max; for_each_mem_region(region) { nid = memblock_get_region_node(region); start = region->base; @@ -2198,6 +2201,13 @@ static void __init memmap_init_reserved_pages(void) memblock_set_node(start, region->size, &memblock.reserved, nid); } + /* + * 'max' is changed means memblock.reserved has been doubled its + * array, which may result a new reserved region before current + * 'start'. Now we should repeat the procedure to set its node id. + */ + if (max_reserved != memblock.reserved.max) + goto repeat; /* * initialize struct pages for reserved regions that don't have From 3b394dff15e14550a26b133fc7b556b5b526f6a5 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 18 Mar 2025 07:19:48 +0000 Subject: [PATCH 009/559] memblock tests: add test for memblock_set_node Add a test to check memblock_set_node() behavior. And create a corner case in which the memblock.reserved array is doubled during memblock_set_node(). And finally make sure all regions in memblock.reserved are with valid node id. Signed-off-by: Wei Yang CC: Mike Rapoport CC: Yajun Deng Link: https://lore.kernel.org/r/20250318071948.23854-4-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- tools/testing/memblock/tests/basic_api.c | 102 +++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index 67503089e6a0..01e836fba488 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -2434,6 +2434,107 @@ static int memblock_overlaps_region_checks(void) return 0; } +#ifdef CONFIG_NUMA +static int memblock_set_node_check(void) +{ + unsigned long i, max_reserved; + struct memblock_region *rgn; + void *orig_region; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_allow_resize(); + + dummy_physical_memory_init(); + memblock_add(dummy_physical_memory_base(), MEM_SIZE); + orig_region = memblock.reserved.regions; + + /* Equally Split range to node 0 and 1*/ + memblock_set_node(memblock_start_of_DRAM(), + memblock_phys_mem_size() / 2, &memblock.memory, 0); + memblock_set_node(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2, + memblock_phys_mem_size() / 2, &memblock.memory, 1); + + ASSERT_EQ(memblock.memory.cnt, 2); + rgn = &memblock.memory.regions[0]; + ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); + ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2); + ASSERT_EQ(memblock_get_region_node(rgn), 0); + rgn = &memblock.memory.regions[1]; + ASSERT_EQ(rgn->base, memblock_start_of_DRAM() + memblock_phys_mem_size() / 2); + ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2); + ASSERT_EQ(memblock_get_region_node(rgn), 1); + + /* Reserve 126 regions with the last one across node boundary */ + for (i = 0; i < 125; i++) + memblock_reserve(memblock_start_of_DRAM() + SZ_16 * i, SZ_8); + + memblock_reserve(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2 - SZ_8, + SZ_16); + + /* + * Commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") + * do following process to set nid to each memblock.reserved region. + * But it may miss some region if memblock_set_node() double the + * array. + * + * By checking 'max', we make sure all region nid is set properly. + */ +repeat: + max_reserved = memblock.reserved.max; + for_each_mem_region(rgn) { + int nid = memblock_get_region_node(rgn); + + memblock_set_node(rgn->base, rgn->size, &memblock.reserved, nid); + } + if (max_reserved != memblock.reserved.max) + goto repeat; + + /* Confirm each region has valid node set */ + for_each_reserved_mem_region(rgn) { + ASSERT_TRUE(numa_valid_node(memblock_get_region_node(rgn))); + if (rgn == (memblock.reserved.regions + memblock.reserved.cnt - 1)) + ASSERT_EQ(1, memblock_get_region_node(rgn)); + else + ASSERT_EQ(0, memblock_get_region_node(rgn)); + } + + dummy_physical_memory_cleanup(); + + /* + * The current reserved.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.reserved.regions = orig_region; + memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS; + + test_pass_pop(); + + return 0; +} + +static int memblock_set_node_checks(void) +{ + prefix_reset(); + prefix_push("memblock_set_node"); + test_print("Running memblock_set_node tests...\n"); + + memblock_set_node_check(); + + prefix_pop(); + + return 0; +} +#else +static int memblock_set_node_checks(void) +{ + return 0; +} +#endif + int memblock_basic_checks(void) { memblock_initialization_check(); @@ -2444,6 +2545,7 @@ int memblock_basic_checks(void) memblock_bottom_up_checks(); memblock_trim_memory_checks(); memblock_overlaps_region_checks(); + memblock_set_node_checks(); return 0; } From 649b50a82f09fa44c2f7a65618e4584072145ab7 Mon Sep 17 00:00:00 2001 From: Ruslan Piasetskyi Date: Wed, 26 Mar 2025 23:06:38 +0100 Subject: [PATCH 010/559] mmc: renesas_sdhi: Fix error handling in renesas_sdhi_probe After moving tmio_mmc_host_probe down, error handling has to be adjusted. Fixes: 74f45de394d9 ("mmc: renesas_sdhi: register irqs before registering controller") Reviewed-by: Ihar Salauyou Signed-off-by: Ruslan Piasetskyi Reviewed-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250326220638.460083-1-ruslan.piasetskyi@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index fa6526be3638..cea6af5daf99 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -1243,26 +1243,26 @@ int renesas_sdhi_probe(struct platform_device *pdev, num_irqs = platform_irq_count(pdev); if (num_irqs < 0) { ret = num_irqs; - goto eirq; + goto edisclk; } /* There must be at least one IRQ source */ if (!num_irqs) { ret = -ENXIO; - goto eirq; + goto edisclk; } for (i = 0; i < num_irqs; i++) { irq = platform_get_irq(pdev, i); if (irq < 0) { ret = irq; - goto eirq; + goto edisclk; } ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0, dev_name(&pdev->dev), host); if (ret) - goto eirq; + goto edisclk; } ret = tmio_mmc_host_probe(host); @@ -1274,8 +1274,6 @@ int renesas_sdhi_probe(struct platform_device *pdev, return ret; -eirq: - tmio_mmc_host_remove(host); edisclk: renesas_sdhi_clk_disable(host); efree: From 9078f01fec1275a1974a01a64a5a495d72898c60 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 29 Mar 2025 17:41:26 +0100 Subject: [PATCH 011/559] mmc: renesas_sdhi: add regulator dependency The driver started using the regulator subsystem and fails to build without a dependeny on CONFIG_REGULATOR: ERROR: modpost: "rdev_get_drvdata" [drivers/mmc/host/renesas_sdhi_core.ko] undefined! ERROR: modpost: "devm_regulator_register" [drivers/mmc/host/renesas_sdhi_core.ko] undefined! The 'select RESET_CONTROLLER' needs to either go away or get changed to a dependency in order to avoid Kconfig dependency loops here. It also turns out the the superh version needs neither RESET_CONTROLLER nor REGULATOR, and this works because CONFIG_OF is not set there. Change both to a 'depends on', but add '|| !OF' for the superh case. Fixes: fae80a99dc03 ("mmc: renesas_sdhi: Add support for RZ/G3E SoC") Tested-by: Biju Das Signed-off-by: Arnd Bergmann Reviewed-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250329164145.3194284-1-arnd@kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 6824131b69b1..264e11fa58ea 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -691,8 +691,8 @@ config MMC_TMIO_CORE config MMC_SDHI tristate "Renesas SDHI SD/SDIO controller support" depends on SUPERH || ARCH_RENESAS || COMPILE_TEST + depends on (RESET_CONTROLLER && REGULATOR) || !OF select MMC_TMIO_CORE - select RESET_CONTROLLER if ARCH_RENESAS help This provides support for the SDHI SD/SDIO controller found in Renesas SuperH, ARM and ARM64 based SoCs From 77183db6b8dbd8c352816030b328dd55993dc330 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 31 Mar 2025 00:17:32 +0200 Subject: [PATCH 012/559] mmc: renesas_sdhi: disable clocks if registering regulator failed Because the clocks were just enabled, bail out to the proper target if there are problems with the regulator. Fixes: fae80a99dc03 ("mmc: renesas_sdhi: Add support for RZ/G3E SoC") Signed-off-by: Wolfram Sang Reviewed-by: Biju Das Tested-by: Biju Das Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250330221732.56072-2-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index cea6af5daf99..8c83e203c516 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -1179,7 +1179,7 @@ int renesas_sdhi_probe(struct platform_device *pdev, if (IS_ERR(rdev)) { dev_err(dev, "regulator register failed err=%ld", PTR_ERR(rdev)); ret = PTR_ERR(rdev); - goto efree; + goto edisclk; } priv->rdev = rdev; } From 6d03811d7a99e08d5928f58120acb45b8ba22b08 Mon Sep 17 00:00:00 2001 From: Gustavo Silva Date: Tue, 4 Mar 2025 15:01:02 -0300 Subject: [PATCH 013/559] iio: imu: bmi270: fix initial sampling frequency configuration In the bmi270_configure_imu() function, the accelerometer and gyroscope configuration registers are incorrectly written with the mask BMI270_PWR_CONF_ADV_PWR_SAVE_MSK, which is unrelated to these registers. As a result, the accelerometer's sampling frequency is set to 200 Hz instead of the intended 100 Hz. Remove the mask to ensure the correct bits are set in the configuration registers. Fixes: 3ea51548d6b2 ("iio: imu: Add i2c driver for bmi270 imu") Signed-off-by: Gustavo Silva Reviewed-by: Alex Lanzano Link: https://patch.msgid.link/20250304-bmi270-odr-fix-v1-1-384dbcd699fb@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bmi270/bmi270_core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iio/imu/bmi270/bmi270_core.c b/drivers/iio/imu/bmi270/bmi270_core.c index a86be5af5ccb..2e4469f30d53 100644 --- a/drivers/iio/imu/bmi270/bmi270_core.c +++ b/drivers/iio/imu/bmi270/bmi270_core.c @@ -918,8 +918,7 @@ static int bmi270_configure_imu(struct bmi270_data *data) FIELD_PREP(BMI270_ACC_CONF_ODR_MSK, BMI270_ACC_CONF_ODR_100HZ) | FIELD_PREP(BMI270_ACC_CONF_BWP_MSK, - BMI270_ACC_CONF_BWP_NORMAL_MODE) | - BMI270_PWR_CONF_ADV_PWR_SAVE_MSK); + BMI270_ACC_CONF_BWP_NORMAL_MODE)); if (ret) return dev_err_probe(dev, ret, "Failed to configure accelerometer"); @@ -927,8 +926,7 @@ static int bmi270_configure_imu(struct bmi270_data *data) FIELD_PREP(BMI270_GYR_CONF_ODR_MSK, BMI270_GYR_CONF_ODR_200HZ) | FIELD_PREP(BMI270_GYR_CONF_BWP_MSK, - BMI270_GYR_CONF_BWP_NORMAL_MODE) | - BMI270_PWR_CONF_ADV_PWR_SAVE_MSK); + BMI270_GYR_CONF_BWP_NORMAL_MODE)); if (ret) return dev_err_probe(dev, ret, "Failed to configure gyroscope"); From 38f67d0264929762e54ae5948703a21f841fe706 Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Sun, 9 Mar 2025 19:35:15 +0000 Subject: [PATCH 014/559] iio: accel: adxl367: fix setting odr for activity time update Fix setting the odr value to update activity time based on frequency derrived by recent odr, and not by obsolete odr value. The [small] bug: When _adxl367_set_odr() is called with a new odr value, it first writes the new odr value to the hardware register ADXL367_REG_FILTER_CTL. Second, it calls _adxl367_set_act_time_ms(), which calls adxl367_time_ms_to_samples(). Here st->odr still holds the old odr value. This st->odr member is used to derrive a frequency value, which is applied to update ADXL367_REG_TIME_ACT. Hence, the idea is to update activity time, based on possibilities and power consumption by the current ODR rate. Finally, when the function calls return, again in _adxl367_set_odr() the new ODR is assigned to st->odr. The fix: When setting a new ODR value is set to ADXL367_REG_FILTER_CTL, also ADXL367_REG_TIME_ACT should probably be updated with a frequency based on the recent ODR value and not the old one. Changing the location of the assignment to st->odr fixes this. Fixes: cbab791c5e2a5 ("iio: accel: add ADXL367 driver") Signed-off-by: Lothar Rubusch Reviewed-by: Marcelo Schmitt Link: https://patch.msgid.link/20250309193515.2974-1-l.rubusch@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl367.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index add4053e7a02..0c04b2bb7efb 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -601,18 +601,14 @@ static int _adxl367_set_odr(struct adxl367_state *st, enum adxl367_odr odr) if (ret) return ret; + st->odr = odr; + /* Activity timers depend on ODR */ ret = _adxl367_set_act_time_ms(st, st->act_time_ms); if (ret) return ret; - ret = _adxl367_set_inact_time_ms(st, st->inact_time_ms); - if (ret) - return ret; - - st->odr = odr; - - return 0; + return _adxl367_set_inact_time_ms(st, st->inact_time_ms); } static int adxl367_set_odr(struct iio_dev *indio_dev, enum adxl367_odr odr) From 159ca7f18129834b6f4c7eae67de48e96c752fc9 Mon Sep 17 00:00:00 2001 From: Silvano Seva Date: Tue, 11 Mar 2025 09:49:47 +0100 Subject: [PATCH 015/559] iio: imu: st_lsm6dsx: fix possible lockup in st_lsm6dsx_read_fifo Prevent st_lsm6dsx_read_fifo from falling in an infinite loop in case pattern_len is equal to zero and the device FIFO is not empty. Fixes: 290a6ce11d93 ("iio: imu: add support to lsm6dsx driver") Signed-off-by: Silvano Seva Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250311085030.3593-2-s.seva@4sigma.it Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c index 0a7cd8c1aa33..480a9b31065c 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c @@ -392,6 +392,9 @@ int st_lsm6dsx_read_fifo(struct st_lsm6dsx_hw *hw) if (fifo_status & cpu_to_le16(ST_LSM6DSX_FIFO_EMPTY_MASK)) return 0; + if (!pattern_len) + pattern_len = ST_LSM6DSX_SAMPLE_SIZE; + fifo_len = (le16_to_cpu(fifo_status) & fifo_diff_mask) * ST_LSM6DSX_CHAN_SIZE; fifo_len = (fifo_len / pattern_len) * pattern_len; From 8114ef86e2058e2554111b793596f17bee23fa15 Mon Sep 17 00:00:00 2001 From: Silvano Seva Date: Tue, 11 Mar 2025 09:49:49 +0100 Subject: [PATCH 016/559] iio: imu: st_lsm6dsx: fix possible lockup in st_lsm6dsx_read_tagged_fifo Prevent st_lsm6dsx_read_tagged_fifo from falling in an infinite loop in case pattern_len is equal to zero and the device FIFO is not empty. Fixes: 801a6e0af0c6 ("iio: imu: st_lsm6dsx: add support to LSM6DSO") Signed-off-by: Silvano Seva Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250311085030.3593-4-s.seva@4sigma.it Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c index 480a9b31065c..8a9d2593576a 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c @@ -626,6 +626,9 @@ int st_lsm6dsx_read_tagged_fifo(struct st_lsm6dsx_hw *hw) if (!fifo_len) return 0; + if (!pattern_len) + pattern_len = ST_LSM6DSX_TAGGED_SAMPLE_SIZE; + for (read_len = 0; read_len < fifo_len; read_len += pattern_len) { err = st_lsm6dsx_read_block(hw, ST_LSM6DSX_REG_FIFO_OUT_TAG_ADDR, From 839f81de397019f55161c5982d670ac19d836173 Mon Sep 17 00:00:00 2001 From: Simon Xue Date: Wed, 12 Mar 2025 14:20:16 +0800 Subject: [PATCH 017/559] iio: adc: rockchip: Fix clock initialization sequence clock_set_rate should be executed after devm_clk_get_enabled. Fixes: 97ad10bb2901 ("iio: adc: rockchip_saradc: Make use of devm_clk_get_enabled") Signed-off-by: Simon Xue Reviewed-by: Heiko Stuebner Link: https://patch.msgid.link/20250312062016.137821-1-xxm@rock-chips.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/rockchip_saradc.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c index 9a099df79518..5e28bd28b81a 100644 --- a/drivers/iio/adc/rockchip_saradc.c +++ b/drivers/iio/adc/rockchip_saradc.c @@ -520,15 +520,6 @@ static int rockchip_saradc_probe(struct platform_device *pdev) if (info->reset) rockchip_saradc_reset_controller(info->reset); - /* - * Use a default value for the converter clock. - * This may become user-configurable in the future. - */ - ret = clk_set_rate(info->clk, info->data->clk_rate); - if (ret < 0) - return dev_err_probe(&pdev->dev, ret, - "failed to set adc clk rate\n"); - ret = regulator_enable(info->vref); if (ret < 0) return dev_err_probe(&pdev->dev, ret, @@ -555,6 +546,14 @@ static int rockchip_saradc_probe(struct platform_device *pdev) if (IS_ERR(info->clk)) return dev_err_probe(&pdev->dev, PTR_ERR(info->clk), "failed to get adc clock\n"); + /* + * Use a default value for the converter clock. + * This may become user-configurable in the future. + */ + ret = clk_set_rate(info->clk, info->data->clk_rate); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "failed to set adc clk rate\n"); platform_set_drvdata(pdev, indio_dev); From 82c51ac74071b80b3199d9e200ae1a5399f4deb0 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 20 Mar 2025 11:21:52 -0500 Subject: [PATCH 018/559] iio: adc: ad7380: disable offload before using SPI bus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move disabling of the SPI offload before attempting to use the SPI bus to write a register in ad7380_offload_buffer_predisable(). This caused a crash in the spi_engine_irq() interrupt handler due to being in an invalid state. Fixes: bbeaec81a03e ("iio: ad7380: add support for SPI offload") Signed-off-by: David Lechner Reviewed-by: Nuno Sá Reviewed-by: Angelo Dureghello Link: https://patch.msgid.link/20250320-iio-adc-ad7380-fix-spi-offload-buffer-predisable-v1-1-6912ac8c0ae0@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7380.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index 4fcb49fdf566..a2b41980c942 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -1211,6 +1211,9 @@ static int ad7380_offload_buffer_predisable(struct iio_dev *indio_dev) struct ad7380_state *st = iio_priv(indio_dev); int ret; + spi_offload_trigger_disable(st->offload, st->offload_trigger); + spi_unoptimize_message(&st->offload_msg); + if (st->seq) { ret = regmap_update_bits(st->regmap, AD7380_REG_ADDR_CONFIG1, @@ -1222,10 +1225,6 @@ static int ad7380_offload_buffer_predisable(struct iio_dev *indio_dev) st->seq = false; } - spi_offload_trigger_disable(st->offload, st->offload_trigger); - - spi_unoptimize_message(&st->offload_msg); - return 0; } From f063a28002e3350088b4577c5640882bf4ea17ea Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Fri, 21 Mar 2025 19:10:00 +0100 Subject: [PATCH 019/559] iio: light: opt3001: fix deadlock due to concurrent flag access The threaded IRQ function in this driver is reading the flag twice: once to lock a mutex and once to unlock it. Even though the code setting the flag is designed to prevent it, there are subtle cases where the flag could be true at the mutex_lock stage and false at the mutex_unlock stage. This results in the mutex not being unlocked, resulting in a deadlock. Fix it by making the opt3001_irq() code generally more robust, reading the flag into a variable and using the variable value at both stages. Fixes: 94a9b7b1809f ("iio: light: add support for TI's opt3001 light sensor") Cc: stable@vger.kernel.org Signed-off-by: Luca Ceresoli Link: https://patch.msgid.link/20250321-opt3001-irq-fix-v1-1-6c520d851562@bootlin.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/opt3001.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/light/opt3001.c b/drivers/iio/light/opt3001.c index 65b295877b41..393a3d2fbe1d 100644 --- a/drivers/iio/light/opt3001.c +++ b/drivers/iio/light/opt3001.c @@ -788,8 +788,9 @@ static irqreturn_t opt3001_irq(int irq, void *_iio) int ret; bool wake_result_ready_queue = false; enum iio_chan_type chan_type = opt->chip_info->chan_type; + bool ok_to_ignore_lock = opt->ok_to_ignore_lock; - if (!opt->ok_to_ignore_lock) + if (!ok_to_ignore_lock) mutex_lock(&opt->lock); ret = i2c_smbus_read_word_swapped(opt->client, OPT3001_CONFIGURATION); @@ -826,7 +827,7 @@ static irqreturn_t opt3001_irq(int irq, void *_iio) } out: - if (!opt->ok_to_ignore_lock) + if (!ok_to_ignore_lock) mutex_unlock(&opt->lock); if (wake_result_ready_queue) From 5257d80e22bf27009d6742e4c174f42cfe54e425 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 18 Mar 2025 17:52:09 -0500 Subject: [PATCH 020/559] iio: adc: ad7606: check for NULL before calling sw_mode_config() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check that the sw_mode_config function pointer is not NULL before calling it. Not all buses define this callback, which resulted in a NULL pointer dereference. Fixes: e571c1902116 ("iio: adc: ad7606: move scale_setup as function pointer on chip-info") Reviewed-by: Nuno Sá Signed-off-by: David Lechner Link: https://patch.msgid.link/20250318-iio-adc-ad7606-improvements-v2-1-4b605427774c@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index 1a314fddd7eb..703556eb7257 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -1236,9 +1236,11 @@ static int ad7616_sw_mode_setup(struct iio_dev *indio_dev) st->write_scale = ad7616_write_scale_sw; st->write_os = &ad7616_write_os_sw; - ret = st->bops->sw_mode_config(indio_dev); - if (ret) - return ret; + if (st->bops->sw_mode_config) { + ret = st->bops->sw_mode_config(indio_dev); + if (ret) + return ret; + } /* Activate Burst mode and SEQEN MODE */ return ad7606_write_mask(st, AD7616_CONFIGURATION_REGISTER, @@ -1268,6 +1270,9 @@ static int ad7606b_sw_mode_setup(struct iio_dev *indio_dev) st->write_scale = ad7606_write_scale_sw; st->write_os = &ad7606_write_os_sw; + if (!st->bops->sw_mode_config) + return 0; + return st->bops->sw_mode_config(indio_dev); } From 83ded7cfaccccd2f4041769c313b58b4c9e265ad Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Mon, 31 Mar 2025 13:50:20 +0800 Subject: [PATCH 021/559] iio: hid-sensor-prox: Restore lost scale assignments The variables `scale_pre_decml`, `scale_post_decml`, and `scale_precision` were assigned in commit d68c592e02f6 ("iio: hid-sensor-prox: Fix scale not correct issue"), but due to a merge conflict in commit 9c15db92a8e5 ("Merge tag 'iio-for-5.13a' of https://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-next"), these assignments were lost. Add back lost assignments and replace `st->prox_attr` with `st->prox_attr[0]` because commit 596ef5cf654b ("iio: hid-sensor-prox: Add support for more channels") changed `prox_attr` to an array. Cc: stable@vger.kernel.org # 5.13+ Fixes: 9c15db92a8e5 ("Merge tag 'iio-for-5.13a' of https://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-next") Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Link: https://patch.msgid.link/20250331055022.1149736-2-lixu.zhang@intel.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/hid-sensor-prox.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 76b76d12b388..1dc6fb7cf614 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -257,6 +257,11 @@ static int prox_parse_report(struct platform_device *pdev, st->num_channels = index; + st->scale_precision = hid_sensor_format_scale(hsdev->usage, + &st->prox_attr[0], + &st->scale_pre_decml, + &st->scale_post_decml); + return 0; } From 8b518cdb03f5f6e06d635cbfd9583d1fdbb39bfd Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Mon, 31 Mar 2025 13:50:21 +0800 Subject: [PATCH 022/559] iio: hid-sensor-prox: support multi-channel SCALE calculation With the introduction of multi-channel support in commit 596ef5cf654b ("iio: hid-sensor-prox: Add support for more channels"), each channel requires an independent SCALE calculation, but the existing code only calculates SCALE for a single channel. Addresses the problem by modifying the driver to perform independent SCALE calculations for each channel. Cc: stable@vger.kernel.org Fixes: 596ef5cf654b ("iio: hid-sensor-prox: Add support for more channels") Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Link: https://patch.msgid.link/20250331055022.1149736-3-lixu.zhang@intel.com Signed-off-by: Jonathan Cameron --- .../hid-sensors/hid-sensor-attributes.c | 4 ++++ drivers/iio/light/hid-sensor-prox.c | 24 ++++++++++--------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c index ad1882f608c0..2055a03cbeb1 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c @@ -66,6 +66,10 @@ static struct { {HID_USAGE_SENSOR_HUMIDITY, 0, 1000, 0}, {HID_USAGE_SENSOR_HINGE, 0, 0, 17453293}, {HID_USAGE_SENSOR_HINGE, HID_USAGE_SENSOR_UNITS_DEGREES, 0, 17453293}, + + {HID_USAGE_SENSOR_HUMAN_PRESENCE, 0, 1, 0}, + {HID_USAGE_SENSOR_HUMAN_PROXIMITY, 0, 1, 0}, + {HID_USAGE_SENSOR_HUMAN_ATTENTION, 0, 1, 0}, }; static void simple_div(int dividend, int divisor, int *whole, diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 1dc6fb7cf614..941508e58286 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -34,9 +34,9 @@ struct prox_state { struct iio_chan_spec channels[MAX_CHANNELS]; u32 channel2usage[MAX_CHANNELS]; u32 human_presence[MAX_CHANNELS]; - int scale_pre_decml; - int scale_post_decml; - int scale_precision; + int scale_pre_decml[MAX_CHANNELS]; + int scale_post_decml[MAX_CHANNELS]; + int scale_precision[MAX_CHANNELS]; unsigned long scan_mask[2]; /* One entry plus one terminator. */ int num_channels; }; @@ -116,9 +116,12 @@ static int prox_read_raw(struct iio_dev *indio_dev, ret_type = IIO_VAL_INT; break; case IIO_CHAN_INFO_SCALE: - *val = prox_state->scale_pre_decml; - *val2 = prox_state->scale_post_decml; - ret_type = prox_state->scale_precision; + if (chan->scan_index >= prox_state->num_channels) + return -EINVAL; + + *val = prox_state->scale_pre_decml[chan->scan_index]; + *val2 = prox_state->scale_post_decml[chan->scan_index]; + ret_type = prox_state->scale_precision[chan->scan_index]; break; case IIO_CHAN_INFO_OFFSET: *val = hid_sensor_convert_exponent( @@ -249,6 +252,10 @@ static int prox_parse_report(struct platform_device *pdev, st->prox_attr[index].size); dev_dbg(&pdev->dev, "prox %x:%x\n", st->prox_attr[index].index, st->prox_attr[index].report_id); + st->scale_precision[index] = + hid_sensor_format_scale(usage_id, &st->prox_attr[index], + &st->scale_pre_decml[index], + &st->scale_post_decml[index]); index++; } @@ -257,11 +264,6 @@ static int prox_parse_report(struct platform_device *pdev, st->num_channels = index; - st->scale_precision = hid_sensor_format_scale(hsdev->usage, - &st->prox_attr[0], - &st->scale_pre_decml, - &st->scale_post_decml); - return 0; } From 79dabbd505210e41c88060806c92c052496dd61c Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Mon, 31 Mar 2025 13:50:22 +0800 Subject: [PATCH 023/559] iio: hid-sensor-prox: Fix incorrect OFFSET calculation The OFFSET calculation in the prox_read_raw() was incorrectly using the unit exponent, which is intended for SCALE calculations. Remove the incorrect OFFSET calculation and set it to a fixed value of 0. Cc: stable@vger.kernel.org Fixes: 39a3a0138f61 ("iio: hid-sensors: Added Proximity Sensor Driver") Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Link: https://patch.msgid.link/20250331055022.1149736-4-lixu.zhang@intel.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/hid-sensor-prox.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 941508e58286..4c65b32d34ce 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -124,8 +124,7 @@ static int prox_read_raw(struct iio_dev *indio_dev, ret_type = prox_state->scale_precision[chan->scan_index]; break; case IIO_CHAN_INFO_OFFSET: - *val = hid_sensor_convert_exponent( - prox_state->prox_attr[chan->scan_index].unit_expo); + *val = 0; ret_type = IIO_VAL_INT; break; case IIO_CHAN_INFO_SAMP_FREQ: From 2d7b60f33da324abe7824037b4829ff7df70e435 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Wed, 2 Apr 2025 18:55:58 -0500 Subject: [PATCH 024/559] iio: adc: ad7380: fix event threshold shift Add required bit shift to the event threshold read function to get correct scaling. When alert support was added, the write function correctly included the required shift needed to convert the threshold register value to the same scale as the raw ADC value. However, the shift got missed in the read function. Fixes: 27d1a4dbe1e1 ("iio: adc: ad7380: add alert support") Signed-off-by: David Lechner Reviewed-by: Julien Stephan Link: https://patch.msgid.link/20250402-iio-adc-ad7380-fix-event-threshold-shift-v1-1-ad4975c296b2@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7380.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index a2b41980c942..aef85093eb16 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -1610,11 +1610,25 @@ static int ad7380_write_event_config(struct iio_dev *indio_dev, return ret; } -static int ad7380_get_alert_th(struct ad7380_state *st, +static int ad7380_get_alert_th(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, enum iio_event_direction dir, int *val) { - int ret, tmp; + struct ad7380_state *st = iio_priv(indio_dev); + const struct iio_scan_type *scan_type; + int ret, tmp, shift; + + scan_type = iio_get_current_scan_type(indio_dev, chan); + if (IS_ERR(scan_type)) + return PTR_ERR(scan_type); + + /* + * The register value is 12-bits and is compared to the most significant + * bits of raw value, therefore a shift is required to convert this to + * the same scale as the raw value. + */ + shift = scan_type->realbits - 12; switch (dir) { case IIO_EV_DIR_RISING: @@ -1624,7 +1638,7 @@ static int ad7380_get_alert_th(struct ad7380_state *st, if (ret) return ret; - *val = FIELD_GET(AD7380_ALERT_HIGH_TH, tmp); + *val = FIELD_GET(AD7380_ALERT_HIGH_TH, tmp) << shift; return IIO_VAL_INT; case IIO_EV_DIR_FALLING: ret = regmap_read(st->regmap, @@ -1633,7 +1647,7 @@ static int ad7380_get_alert_th(struct ad7380_state *st, if (ret) return ret; - *val = FIELD_GET(AD7380_ALERT_LOW_TH, tmp); + *val = FIELD_GET(AD7380_ALERT_LOW_TH, tmp) << shift; return IIO_VAL_INT; default: return -EINVAL; @@ -1647,7 +1661,6 @@ static int ad7380_read_event_value(struct iio_dev *indio_dev, enum iio_event_info info, int *val, int *val2) { - struct ad7380_state *st = iio_priv(indio_dev); int ret; switch (info) { @@ -1655,7 +1668,7 @@ static int ad7380_read_event_value(struct iio_dev *indio_dev, if (!iio_device_claim_direct(indio_dev)) return -EBUSY; - ret = ad7380_get_alert_th(st, dir, val); + ret = ad7380_get_alert_th(indio_dev, chan, dir, val); iio_device_release_direct(indio_dev); return ret; From 9ca67840c0ddf3f39407339624cef824a4f27599 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 6 Mar 2025 18:54:47 +0000 Subject: [PATCH 025/559] firmware: arm_scmi: Balance device refcount when destroying devices Using device_find_child() to lookup the proper SCMI device to destroy causes an unbalance in device refcount, since device_find_child() calls an implicit get_device(): this, in turns, inhibits the call of the provided release methods upon devices destruction. As a consequence, one of the structures that is not freed properly upon destruction is the internal struct device_private dev->p populated by the drivers subsystem core. KMemleak detects this situation since loading/unloding some SCMI driver causes related devices to be created/destroyed without calling any device_release method. unreferenced object 0xffff00000f583800 (size 512): comm "insmod", pid 227, jiffies 4294912190 hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff 60 36 1d 8a 00 80 ff ff ........`6...... backtrace (crc 114e2eed): kmemleak_alloc+0xbc/0xd8 __kmalloc_cache_noprof+0x2dc/0x398 device_add+0x954/0x12d0 device_register+0x28/0x40 __scmi_device_create.part.0+0x1bc/0x380 scmi_device_create+0x2d0/0x390 scmi_create_protocol_devices+0x74/0xf8 scmi_device_request_notifier+0x1f8/0x2a8 notifier_call_chain+0x110/0x3b0 blocking_notifier_call_chain+0x70/0xb0 scmi_driver_register+0x350/0x7f0 0xffff80000a3b3038 do_one_initcall+0x12c/0x730 do_init_module+0x1dc/0x640 load_module+0x4b20/0x5b70 init_module_from_file+0xec/0x158 $ ./scripts/faddr2line ./vmlinux device_add+0x954/0x12d0 device_add+0x954/0x12d0: kmalloc_noprof at include/linux/slab.h:901 (inlined by) kzalloc_noprof at include/linux/slab.h:1037 (inlined by) device_private_init at drivers/base/core.c:3510 (inlined by) device_add at drivers/base/core.c:3561 Balance device refcount by issuing a put_device() on devices found via device_find_child(). Reported-by: Alice Ryhl Closes: https://lore.kernel.org/linux-arm-kernel/Z8nK3uFkspy61yjP@arm.com/T/#mc1f73a0ea5e41014fa145147b7b839fc988ada8f CC: Sudeep Holla CC: Catalin Marinas Fixes: d4f9dddd21f3 ("firmware: arm_scmi: Add dynamic scmi devices creation") Signed-off-by: Cristian Marussi Tested-by: Alice Ryhl Message-Id: <20250306185447.2039336-1-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index 7af01664ce7e..3a5474015f7d 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -255,6 +255,9 @@ static struct scmi_device *scmi_child_dev_find(struct device *parent, if (!dev) return NULL; + /* Drop the refcnt bumped implicitly by device_find_child */ + put_device(dev); + return to_scmi_dev(dev); } From c23c03bf1faa1e76be1eba35bad6da6a2a7c95ee Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 10 Mar 2025 17:58:00 +0000 Subject: [PATCH 026/559] firmware: arm_scmi: Fix timeout checks on polling path Polling mode transactions wait for a reply busy-looping without holding a spinlock, but currently the timeout checks are based only on elapsed time: as a result we could hit a false positive whenever our busy-looping thread is pre-empted and scheduled out for a time greater than the polling timeout. Change the checks at the end of the busy-loop to make sure that the polling wasn't indeed successful or an out-of-order reply caused the polling to be forcibly terminated. Fixes: 31d2f803c19c ("firmware: arm_scmi: Add sync_cmds_completed_on_ret transport flag") Reported-by: Huangjie Closes: https://lore.kernel.org/arm-scmi/20250123083323.2363749-1-jackhuang021@gmail.com/ Signed-off-by: Cristian Marussi Cc: stable@vger.kernel.org # 5.18.x Message-Id: <20250310175800.1444293-1-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 1c75a4c9c371..0390d5ff195e 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -1248,7 +1248,8 @@ static void xfer_put(const struct scmi_protocol_handle *ph, } static bool scmi_xfer_done_no_timeout(struct scmi_chan_info *cinfo, - struct scmi_xfer *xfer, ktime_t stop) + struct scmi_xfer *xfer, ktime_t stop, + bool *ooo) { struct scmi_info *info = handle_to_scmi_info(cinfo->handle); @@ -1257,7 +1258,7 @@ static bool scmi_xfer_done_no_timeout(struct scmi_chan_info *cinfo, * in case of out-of-order receptions of delayed responses */ return info->desc->ops->poll_done(cinfo, xfer) || - try_wait_for_completion(&xfer->done) || + (*ooo = try_wait_for_completion(&xfer->done)) || ktime_after(ktime_get(), stop); } @@ -1274,15 +1275,17 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, * itself to support synchronous commands replies. */ if (!desc->sync_cmds_completed_on_ret) { + bool ooo = false; + /* * Poll on xfer using transport provided .poll_done(); * assumes no completion interrupt was available. */ ktime_t stop = ktime_add_ms(ktime_get(), timeout_ms); - spin_until_cond(scmi_xfer_done_no_timeout(cinfo, - xfer, stop)); - if (ktime_after(ktime_get(), stop)) { + spin_until_cond(scmi_xfer_done_no_timeout(cinfo, xfer, + stop, &ooo)); + if (!ooo && !info->desc->ops->poll_done(cinfo, xfer)) { dev_err(dev, "timed out in resp(caller: %pS) - polling\n", (void *)_RET_IP_); From 4567bdaaaaa1744da3d7da07d9aca2f941f5b4e5 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 21 Mar 2025 11:57:00 +0000 Subject: [PATCH 027/559] firmware: arm_ffa: Skip Rx buffer ownership release if not acquired MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completion of the FFA_PARTITION_INFO_GET ABI transfers the ownership of the caller’s Rx buffer from the producer(typically partition mnager) to the consumer(this driver/OS). FFA_RX_RELEASE transfers the ownership from the consumer back to the producer. However, when we set the flag to just return the count of partitions deployed in the system corresponding to the specified UUID while invoking FFA_PARTITION_INFO_GET, the Rx buffer ownership shouldn't be transferred to this driver. We must be able to skip transferring back the ownership to the partition manager when we request just to get the count of the partitions as the buffers are not acquired in this case. Firmware may return FFA_RET_DENIED or other error for the ffa_rx_release() in such cases. Fixes: bb1be7498500 ("firmware: arm_ffa: Add v1.1 get_partition_info support") Message-Id: <20250321115700.3525197-1-sudeep.holla@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 19295282de24..fe55613a8ea9 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -299,7 +299,8 @@ __ffa_partition_info_get(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, import_uuid(&buf->uuid, (u8 *)&rx_buf->uuid); } - ffa_rx_release(); + if (!(flags & PARTITION_INFO_GET_RETURN_COUNT_ONLY)) + ffa_rx_release(); mutex_unlock(&drv_info->rx_lock); From 7bd47be16108e55e6bc85bdd3cae5c9a2bc98a89 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Apr 2025 10:21:26 +0300 Subject: [PATCH 028/559] dm table: Fix W=1 build warning when mempool_needs_integrity is unused The mempool_needs_integrity is unused. This, in particular, prevents kernel builds with Clang, `make W=1` and CONFIG_WERROR=y: drivers/md/dm-table.c:1052:7: error: variable 'mempool_needs_integrity' set but not used [-Werror,-Wunused-but-set-variable] 1052 | bool mempool_needs_integrity = t->integrity_supported; | ^ Fix this by removing the leftover. Fixes: 105ca2a2c2ff ("block: split struct bio_integrity_payload") Signed-off-by: Andy Shevchenko Signed-off-by: Mikulas Patocka --- drivers/md/dm-table.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 35100a435c88..53759dbbe9d6 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1049,7 +1049,6 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * unsigned int min_pool_size = 0, pool_size; struct dm_md_mempools *pools; unsigned int bioset_flags = 0; - bool mempool_needs_integrity = t->integrity_supported; if (unlikely(type == DM_TYPE_NONE)) { DMERR("no table type is set, can't allocate mempools"); @@ -1074,8 +1073,6 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * per_io_data_size = max(per_io_data_size, ti->per_io_data_size); min_pool_size = max(min_pool_size, ti->num_flush_bios); - - mempool_needs_integrity |= ti->mempool_needs_integrity; } pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); front_pad = roundup(per_io_data_size, From 0c562281199f225a849dbb5b9a40b079ee31dc0e Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 3 Apr 2025 21:59:24 -0500 Subject: [PATCH 029/559] arm64: dts: morello: Fix-up cache nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no need include the CPU number in the L2 cache node names as the names are local to the CPU nodes. The documented node name is also just "l2-cache". The L3 cache is not part of cpu@0/l2-cache as it is shared among all cores. Move it to /cpus node which is the typical place for shared caches. Signed-off-by: Rob Herring (Arm) Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20250403-dt-cpu-schema-v1-3-076be7171a85@kernel.org> Signed-off-by: Sudeep Holla --- arch/arm64/boot/dts/arm/morello.dtsi | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm64/boot/dts/arm/morello.dtsi b/arch/arm64/boot/dts/arm/morello.dtsi index 0bab0b3ea969..5bc1c725dc86 100644 --- a/arch/arm64/boot/dts/arm/morello.dtsi +++ b/arch/arm64/boot/dts/arm/morello.dtsi @@ -44,7 +44,7 @@ next-level-cache = <&l2_0>; clocks = <&scmi_dvfs 0>; - l2_0: l2-cache-0 { + l2_0: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -53,13 +53,6 @@ cache-sets = <2048>; cache-unified; next-level-cache = <&l3_0>; - - l3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; - cache-size = <0x100000>; - cache-unified; - }; }; }; @@ -78,7 +71,7 @@ next-level-cache = <&l2_1>; clocks = <&scmi_dvfs 0>; - l2_1: l2-cache-1 { + l2_1: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -105,7 +98,7 @@ next-level-cache = <&l2_2>; clocks = <&scmi_dvfs 1>; - l2_2: l2-cache-2 { + l2_2: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -132,7 +125,7 @@ next-level-cache = <&l2_3>; clocks = <&scmi_dvfs 1>; - l2_3: l2-cache-3 { + l2_3: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -143,6 +136,13 @@ next-level-cache = <&l3_0>; }; }; + + l3_0: l3-cache { + compatible = "cache"; + cache-level = <3>; + cache-size = <0x100000>; + cache-unified; + }; }; firmware { From 0cd34d98dfd4f2b596415b8f12faf7b946613458 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 6 Apr 2025 22:01:42 +0200 Subject: [PATCH 030/559] iio: accel: fxls8962af: Fix wakeup source leaks on device unbind Device can be unbound, so driver must also release memory for the wakeup source. Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250406-b4-device-wakeup-leak-iio-v1-1-2d7d322a4a93@linaro.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/fxls8962af-core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c index 48e4282964a0..bf1d3923a181 100644 --- a/drivers/iio/accel/fxls8962af-core.c +++ b/drivers/iio/accel/fxls8962af-core.c @@ -1226,8 +1226,11 @@ int fxls8962af_core_probe(struct device *dev, struct regmap *regmap, int irq) if (ret) return ret; - if (device_property_read_bool(dev, "wakeup-source")) - device_init_wakeup(dev, true); + if (device_property_read_bool(dev, "wakeup-source")) { + ret = devm_device_init_wakeup(dev); + if (ret) + return dev_err_probe(dev, ret, "Failed to init wakeup\n"); + } return devm_iio_device_register(dev, indio_dev); } From ad3764b45c1524872b621d5667a56f6a574501bd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 6 Apr 2025 22:01:43 +0200 Subject: [PATCH 031/559] iio: adc: qcom-spmi-iadc: Fix wakeup source leaks on device unbind Device can be unbound, so driver must also release memory for the wakeup source. Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250406-b4-device-wakeup-leak-iio-v1-2-2d7d322a4a93@linaro.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/qcom-spmi-iadc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/qcom-spmi-iadc.c b/drivers/iio/adc/qcom-spmi-iadc.c index 7fb8b2499a1d..b64a8a407168 100644 --- a/drivers/iio/adc/qcom-spmi-iadc.c +++ b/drivers/iio/adc/qcom-spmi-iadc.c @@ -543,7 +543,9 @@ static int iadc_probe(struct platform_device *pdev) else return ret; } else { - device_init_wakeup(iadc->dev, 1); + ret = devm_device_init_wakeup(iadc->dev); + if (ret) + return dev_err_probe(iadc->dev, ret, "Failed to init wakeup\n"); } ret = iadc_update_offset(iadc); From 4551383e78d59b34eea3f4ed28ad22df99e25d59 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 6 Apr 2025 22:01:44 +0200 Subject: [PATCH 032/559] iio: imu: st_lsm6dsx: Fix wakeup source leaks on device unbind Device can be unbound, so driver must also release memory for the wakeup source. Signed-off-by: Krzysztof Kozlowski Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250406-b4-device-wakeup-leak-iio-v1-3-2d7d322a4a93@linaro.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 4fdcc2acc94e..96c6106b95ee 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -2719,8 +2719,11 @@ int st_lsm6dsx_probe(struct device *dev, int irq, int hw_id, } if (device_property_read_bool(dev, "wakeup-source") || - (pdata && pdata->wakeup_source)) - device_init_wakeup(dev, true); + (pdata && pdata->wakeup_source)) { + err = devm_device_init_wakeup(dev); + if (err) + return dev_err_probe(dev, err, "Failed to init wakeup\n"); + } return 0; } From 1aa495a6572f8641da4ec4cd32210deca61bed64 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 11 Apr 2025 13:36:06 +0100 Subject: [PATCH 033/559] kunit: configs: Add some Cirrus Logic modules to all_tests Add CONFIG_I2C and CONFIG_SND_SOC_CS35L56_I2C to all_tests.config so that Cirrus Logic modules with KUnit tests will be built. The CS35L56 driver doesn't currently have any KUnit tests itself, but it enables two other libraries that have KUnit tests: cs_dsp and cs-amp-lib. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250411123608.1676462-2-rf@opensource.cirrus.com Reviewed-by: David Gow Signed-off-by: Mark Brown --- tools/testing/kunit/configs/all_tests.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index cdd9782f9646..43d3c31ab53f 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -20,6 +20,7 @@ CONFIG_VFAT_FS=y CONFIG_PCI=y CONFIG_USB4=y +CONFIG_I2C=y CONFIG_NET=y CONFIG_MCTP=y @@ -51,3 +52,4 @@ CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SOC=y CONFIG_SND_SOC_TOPOLOGY_BUILD=y +CONFIG_SND_SOC_CS35L56_I2C=y From 96014d91cffb335d3b396771524ff2aba3549865 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 11 Apr 2025 13:36:07 +0100 Subject: [PATCH 034/559] ASoC: cs-amp-lib-test: Don't select SND_SOC_CS_AMP_LIB Depend on SND_SOC_CS_AMP_LIB instead of selecting it. KUNIT_ALL_TESTS should only build tests for components that are already being built, it should not cause other stuff to be added to the build. Fixes: 177862317a98 ("ASoC: cs-amp-lib: Add KUnit test for calibration helpers") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250411123608.1676462-3-rf@opensource.cirrus.com Reviewed-by: David Gow Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 40bb7a1d44bc..20f99cbee29b 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -776,10 +776,9 @@ config SND_SOC_CS_AMP_LIB tristate config SND_SOC_CS_AMP_LIB_TEST - tristate "KUnit test for Cirrus Logic cs-amp-lib" - depends on KUNIT + tristate "KUnit test for Cirrus Logic cs-amp-lib" if !KUNIT_ALL_TESTS + depends on SND_SOC_CS_AMP_LIB && KUNIT default KUNIT_ALL_TESTS - select SND_SOC_CS_AMP_LIB help This builds KUnit tests for the Cirrus Logic common amplifier library. From a0b887f6eb9a0d1be3c57d00b0f3ba8408d3018a Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Fri, 11 Apr 2025 13:36:08 +0100 Subject: [PATCH 035/559] firmware: cs_dsp: tests: Depend on FW_CS_DSP rather then enabling it FW_CS_DSP gets enabled if KUNIT is enabled. The test should rather depend on if the feature is enabled. Fix this by moving FW_CS_DSP to the depends on clause. Fixes: dd0b6b1f29b9 ("firmware: cs_dsp: Add KUnit testing of bin file download") Signed-off-by: Nico Pache Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250411123608.1676462-4-rf@opensource.cirrus.com Reviewed-by: David Gow Signed-off-by: Mark Brown --- drivers/firmware/cirrus/Kconfig | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/firmware/cirrus/Kconfig b/drivers/firmware/cirrus/Kconfig index 0a883091259a..e3c2e38b746d 100644 --- a/drivers/firmware/cirrus/Kconfig +++ b/drivers/firmware/cirrus/Kconfig @@ -6,14 +6,11 @@ config FW_CS_DSP config FW_CS_DSP_KUNIT_TEST_UTILS tristate - depends on KUNIT && REGMAP - select FW_CS_DSP config FW_CS_DSP_KUNIT_TEST tristate "KUnit tests for Cirrus Logic cs_dsp" if !KUNIT_ALL_TESTS - depends on KUNIT && REGMAP + depends on KUNIT && REGMAP && FW_CS_DSP default KUNIT_ALL_TESTS - select FW_CS_DSP select FW_CS_DSP_KUNIT_TEST_UTILS help This builds KUnit tests for cs_dsp. From a9a69c3b38c89d7992fb53db4abb19104b531d32 Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Sun, 6 Apr 2025 16:08:54 -0500 Subject: [PATCH 036/559] ASoC: imx-card: Adjust over allocation of memory in imx_card_parse_of() Incorrect types are used as sizeof() arguments in devm_kcalloc(). It should be sizeof(dai_link_data) for link_data instead of sizeof(snd_soc_dai_link). This is found by our static analysis tool. Signed-off-by: Chenyuan Yang Link: https://patch.msgid.link/20250406210854.149316-1-chenyuan0y@gmail.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-card.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c index 3686d468506b..45e000f61ecc 100644 --- a/sound/soc/fsl/imx-card.c +++ b/sound/soc/fsl/imx-card.c @@ -544,7 +544,7 @@ static int imx_card_parse_of(struct imx_card_data *data) if (!card->dai_link) return -ENOMEM; - data->link_data = devm_kcalloc(dev, num_links, sizeof(*link), GFP_KERNEL); + data->link_data = devm_kcalloc(dev, num_links, sizeof(*link_data), GFP_KERNEL); if (!data->link_data) return -ENOMEM; From 9aff2e8df240e84a36f2607f98a0a9924a24e65d Mon Sep 17 00:00:00 2001 From: Sheetal Date: Fri, 4 Apr 2025 10:59:53 +0000 Subject: [PATCH 037/559] ASoC: soc-pcm: Fix hw_params() and DAPM widget sequence Issue: When multiple audio streams share a common BE DAI, the BE DAI widget can be powered up before its hardware parameters are configured. This incorrect sequence leads to intermittent pcm_write errors. For example, the below Tegra use-case throws an error: aplay(2 streams) -> AMX(mux) -> ADX(demux) -> arecord(2 streams), here, 'AMX TX' and 'ADX RX' are common BE DAIs. For above usecase when failure happens below sequence is observed: aplay(1) FE open() - BE DAI callbacks added to the list - BE DAI state = SND_SOC_DPCM_STATE_OPEN aplay(2) FE open() - BE DAI callbacks are not added to the list as the state is already SND_SOC_DPCM_STATE_OPEN during aplay(1) FE open(). aplay(2) FE hw_params() - BE DAI hw_params() callback ignored aplay(2) FE prepare() - Widget is powered ON without BE DAI hw_params() call aplay(1) FE hw_params() - BE DAI hw_params() is now called Fix: Add BE DAIs in the list if its state is either SND_SOC_DPCM_STATE_OPEN or SND_SOC_DPCM_STATE_HW_PARAMS as well. It ensures the widget is powered ON after BE DAI hw_params() callback. Fixes: 0c25db3f7621 ("ASoC: soc-pcm: Don't reconnect an already active BE") Signed-off-by: Sheetal Link: https://patch.msgid.link/20250404105953.2784819-1-sheetal@nvidia.com Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 4308a6cbb2e6..43835197d1fe 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1584,10 +1584,13 @@ int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream, /* * Filter for systems with 'component_chaining' enabled. * This helps to avoid unnecessary re-configuration of an - * already active BE on such systems. + * already active BE on such systems and ensures the BE DAI + * widget is powered ON after hw_params() BE DAI callback. */ if (fe->card->component_chaining && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_NEW) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_PARAMS) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_CLOSE)) continue; From 29bdc1f1c1df80868fb35bc69d1f073183adc6de Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Mon, 10 Mar 2025 07:44:09 -0500 Subject: [PATCH 038/559] book3s64/radix: Fix compile errors when CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=n Fix compile errors when CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=n Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Donet Tom Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/8231763344223c193e3452eab0ae8ea966aff466.1741609795.git.donettom@linux.ibm.com --- arch/powerpc/mm/book3s64/radix_pgtable.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 311e2112d782..bd6916419472 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -976,7 +976,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, return 0; } - +#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) { if (radix_enabled()) @@ -984,6 +984,7 @@ bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) return false; } +#endif int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, unsigned long addr, unsigned long next) From 9cf7e13fecbab0894f6986fc6986ab2eba8de52e Mon Sep 17 00:00:00 2001 From: Donet Tom Date: Mon, 10 Mar 2025 07:44:10 -0500 Subject: [PATCH 039/559] book3s64/radix : Align section vmemmap start address to PAGE_SIZE A vmemmap altmap is a device-provided region used to provide backing storage for struct pages. For each namespace, the altmap should belong to that same namespace. If the namespaces are created unaligned, there is a chance that the section vmemmap start address could also be unaligned. If the section vmemmap start address is unaligned, the altmap page allocated from the current namespace might be used by the previous namespace also. During the free operation, since the altmap is shared between two namespaces, the previous namespace may detect that the page does not belong to its altmap and incorrectly assume that the page is a normal page. It then attempts to free the normal page, which leads to a kernel crash. Kernel attempted to read user page (18) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x00000018 Faulting instruction address: 0xc000000000530c7c Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries CPU: 32 PID: 2104 Comm: ndctl Kdump: loaded Tainted: G W NIP: c000000000530c7c LR: c000000000530e00 CTR: 0000000000007ffe REGS: c000000015e57040 TRAP: 0300 Tainted: G W MSR: 800000000280b033 CR: 84482404 CFAR: c000000000530dfc DAR: 0000000000000018 DSISR: 40000000 IRQMASK: 0 GPR00: c000000000530e00 c000000015e572e0 c000000002c5cb00 c00c000101008040 GPR04: 0000000000000000 0000000000000007 0000000000000001 000000000000001f GPR08: 0000000000000005 0000000000000000 0000000000000018 0000000000002000 GPR12: c0000000001d2fb0 c0000060de6b0080 0000000000000000 c0000060dbf90020 GPR16: c00c000101008000 0000000000000001 0000000000000000 c000000125b20f00 GPR20: 0000000000000001 0000000000000000 ffffffffffffffff c00c000101007fff GPR24: 0000000000000001 0000000000000000 0000000000000000 0000000000000000 GPR28: 0000000004040201 0000000000000001 0000000000000000 c00c000101008040 NIP [c000000000530c7c] get_pfnblock_flags_mask+0x7c/0xd0 LR [c000000000530e00] free_unref_page_prepare+0x130/0x4f0 Call Trace: free_unref_page+0x50/0x1e0 free_reserved_page+0x40/0x68 free_vmemmap_pages+0x98/0xe0 remove_pte_table+0x164/0x1e8 remove_pmd_table+0x204/0x2c8 remove_pud_table+0x1c4/0x288 remove_pagetable+0x1c8/0x310 vmemmap_free+0x24/0x50 section_deactivate+0x28c/0x2a0 __remove_pages+0x84/0x110 arch_remove_memory+0x38/0x60 memunmap_pages+0x18c/0x3d0 devm_action_release+0x30/0x50 release_nodes+0x68/0x140 devres_release_group+0x100/0x190 dax_pmem_compat_release+0x44/0x80 [dax_pmem_compat] device_for_each_child+0x8c/0x100 [dax_pmem_compat_remove+0x2c/0x50 [dax_pmem_compat] nvdimm_bus_remove+0x78/0x140 [libnvdimm] device_remove+0x70/0xd0 Another issue is that if there is no altmap, a PMD-sized vmemmap page will be allocated from RAM, regardless of the alignment of the section start address. If the section start address is not aligned to the PMD size, a VM_BUG_ON will be triggered when setting the PMD-sized page to page table. In this patch, we are aligning the section vmemmap start address to PAGE_SIZE. After alignment, the start address will not be part of the current namespace, and a normal page will be allocated for the vmemmap mapping of the current section. For the remaining sections, altmaps will be allocated. During the free operation, the normal page will be correctly freed. In the same way, a PMD_SIZE vmemmap page will be allocated only if the section start address is PMD_SIZE-aligned; otherwise, it will fall back to a PAGE-sized vmemmap allocation. Without this patch ================== NS1 start NS2 start _________________________________________________________ | NS1 | NS2 | --------------------------------------------------------- | Altmap| Altmap | .....|Altmap| Altmap | ........... | NS1 | NS1 | | NS2 | NS2 | In the above scenario, NS1 and NS2 are two namespaces. The vmemmap for NS1 comes from Altmap NS1, which belongs to NS1, and the vmemmap for NS2 comes from Altmap NS2, which belongs to NS2. The vmemmap start for NS2 is not aligned, so Altmap NS2 is shared by both NS1 and NS2. During the free operation in NS1, Altmap NS2 is not part of NS1's altmap, causing it to attempt to free an invalid page. With this patch =============== NS1 start NS2 start _________________________________________________________ | NS1 | NS2 | --------------------------------------------------------- | Altmap| Altmap | .....| Normal | Altmap | Altmap |....... | NS1 | NS1 | | Page | NS2 | NS2 | If the vmemmap start for NS2 is not aligned then we are allocating a normal page. NS1 and NS2 vmemmap will be freed correctly. Fixes: 368a0590d954 ("powerpc/book3s64/vmemmap: switch radix to use a different vmemmap handling function") Co-developed-by: Ritesh Harjani (IBM) Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Donet Tom Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/8f98ec2b442977c618f7256cec88eb17dde3f2b9.1741609795.git.donettom@linux.ibm.com --- arch/powerpc/mm/book3s64/radix_pgtable.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index bd6916419472..9f764bc42b8c 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1121,6 +1121,19 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in pmd_t *pmd; pte_t *pte; + /* + * Make sure we align the start vmemmap addr so that we calculate + * the correct start_pfn in altmap boundary check to decided whether + * we should use altmap or RAM based backing memory allocation. Also + * the address need to be aligned for set_pte operation. + + * If the start addr is already PMD_SIZE aligned we will try to use + * a pmd mapping. We don't want to be too aggressive here beacause + * that will cause more allocations in RAM. So only if the namespace + * vmemmap start addr is PMD_SIZE aligned we will use PMD mapping. + */ + + start = ALIGN_DOWN(start, PAGE_SIZE); for (addr = start; addr < end; addr = next) { next = pmd_addr_end(addr, end); @@ -1146,8 +1159,8 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in * in altmap block allocation failures, in which case * we fallback to RAM for vmemmap allocation. */ - if (altmap && (!IS_ALIGNED(addr, PMD_SIZE) || - altmap_cross_boundary(altmap, addr, PMD_SIZE))) { + if (!IS_ALIGNED(addr, PMD_SIZE) || (altmap && + altmap_cross_boundary(altmap, addr, PMD_SIZE))) { /* * make sure we don't create altmap mappings * covering things outside the device. From 534f5a8ba27863141e29766467a3e1f61bcb47ac Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Wed, 5 Feb 2025 00:18:21 +0100 Subject: [PATCH 040/559] powerpc64/ftrace: fix module loading without patchable function entries get_stubs_size assumes that there must always be at least one patchable function entry, which is not always the case (modules that export data but no code), otherwise it returns -ENOEXEC and thus the section header sh_size is set to that value. During module_memory_alloc() the size is passed to execmem_alloc() after being page-aligned and thus set to zero which will cause it to fail the allocation (and thus module loading) as __vmalloc_node_range() checks for zero-sized allocs and returns null: [ 115.466896] module_64: cast_common: doesn't contain __patchable_function_entries. [ 115.469189] ------------[ cut here ]------------ [ 115.469496] WARNING: CPU: 0 PID: 274 at mm/vmalloc.c:3778 __vmalloc_node_range_noprof+0x8b4/0x8f0 ... [ 115.478574] ---[ end trace 0000000000000000 ]--- [ 115.479545] execmem: unable to allocate memory Fix this by removing the check completely, since it is anyway not helpful to propagate this as an error upwards. Fixes: eec37961a56a ("powerpc64/ftrace: Move ftrace sequence out of line") Signed-off-by: Anthony Iliopoulos Acked-by: Naveen N Rao (AMD) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250204231821.39140-1-ailiop@suse.com --- arch/powerpc/kernel/module_64.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 34a5aec4908f..126bf3b06ab7 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -258,10 +258,6 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, break; } } - if (i == hdr->e_shnum) { - pr_err("%s: doesn't contain __patchable_function_entries.\n", me->name); - return -ENOEXEC; - } #endif pr_debug("Looks like a total of %lu stubs, max\n", relocs); From 3700976f2ae8dfec4c17433f8a16c9e6c334cf89 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 7 Apr 2025 14:10:29 +0530 Subject: [PATCH 041/559] powerpc: Add check to select PPC_RADIX_BROADCAST_TLBIE Commit 3d45a3d0d2e6 ("powerpc: Define config option for processors with broadcast TLBIE") added a config option PPC_RADIX_BROADCAST_TLBIE to support processors with broadcast TLBIE. Since this option is relevant only for RADIX_MMU, add a check as a dependency to enable PPC_RADIX_BROADCAST_TLBIE in both powernv and pseries configs. This fixes the unmet config dependency warning reported WARNING: unmet direct dependencies detected for PPC_RADIX_BROADCAST_TLBIE Depends on [n]: PPC_RADIX_MMU [=n] Selected by [y]: - PPC_PSERIES [=y] && PPC64 [=y] && PPC_BOOK3S [=y] Reported-by: kernel test robot Tested-by: Venkat Rao Bagalkote Reviewed-by: Ritesh Harjani (IBM) Closes: https://lore.kernel.org/oe-kbuild-all/202504051857.jRqxM60c-lkp@intel.com/ Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250407084029.357710-1-maddy@linux.ibm.com --- arch/powerpc/platforms/powernv/Kconfig | 2 +- arch/powerpc/platforms/pseries/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 3fbe0295ce14..95d7ba73d43d 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -17,7 +17,7 @@ config PPC_POWERNV select MMU_NOTIFIER select FORCE_SMP select ARCH_SUPPORTS_PER_VMA_LOCK - select PPC_RADIX_BROADCAST_TLBIE + select PPC_RADIX_BROADCAST_TLBIE if PPC_RADIX_MMU default y config OPAL_PRD diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index a934c2a262f6..fa3c2fff082a 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -23,7 +23,7 @@ config PPC_PSERIES select FORCE_SMP select SWIOTLB select ARCH_SUPPORTS_PER_VMA_LOCK - select PPC_RADIX_BROADCAST_TLBIE + select PPC_RADIX_BROADCAST_TLBIE if PPC_RADIX_MMU default y config PARAVIRT From e64c0ff0d5d85791fbcd126ee558100a06a24a97 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 27 Mar 2025 11:16:00 +0800 Subject: [PATCH 042/559] pinctrl: imx: Return NULL if no group is matched and found Currently if no group is matched and found, this function will return the last grp to the caller, this is not expected, it is supposed to return NULL in this case. Fixes: e566fc11ea76 ("pinctrl: imx: use generic pinctrl helpers for managing groups") Signed-off-by: Hui Wang Reviewed-by: Frank Li Link: https://lore.kernel.org/20250327031600.99723-1-hui.wang@canonical.com Signed-off-by: Linus Walleij --- drivers/pinctrl/freescale/pinctrl-imx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c index 842a1e6cbfc4..18de31328540 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx.c +++ b/drivers/pinctrl/freescale/pinctrl-imx.c @@ -37,16 +37,16 @@ static inline const struct group_desc *imx_pinctrl_find_group_by_name( struct pinctrl_dev *pctldev, const char *name) { - const struct group_desc *grp = NULL; + const struct group_desc *grp; int i; for (i = 0; i < pctldev->num_groups; i++) { grp = pinctrl_generic_get_group(pctldev, i); if (grp && !strcmp(grp->grp.name, name)) - break; + return grp; } - return grp; + return NULL; } static void imx_pin_dbg_show(struct pinctrl_dev *pctldev, struct seq_file *s, From e56088a13708757da68ad035269d69b93ac8c389 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 29 Mar 2025 20:01:32 +0100 Subject: [PATCH 043/559] pinctrl: meson: define the pull up/down resistor value as 60 kOhm The public datasheets of the following Amlogic SoCs describe a typical resistor value for the built-in pull up/down resistor: - Meson8/8b/8m2: not documented - GXBB (S905): 60 kOhm - GXL (S905X): 60 kOhm - GXM (S912): 60 kOhm - G12B (S922X): 60 kOhm - SM1 (S905D3): 60 kOhm The public G12B and SM1 datasheets additionally state min and max values: - min value: 50 kOhm for both, pull-up and pull-down - max value for the pull-up: 70 kOhm - max value for the pull-down: 130 kOhm Use 60 kOhm in the pinctrl-meson driver as well so it's shown in the debugfs output. It may not be accurate for Meson8/8b/8m2 but in reality 60 kOhm is closer to the actual value than 1 Ohm. Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/20250329190132.855196-1-martin.blumenstingl@googlemail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 253a0cc57e39..e5a32a0532ee 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -487,7 +487,7 @@ static int meson_pinconf_get(struct pinctrl_dev *pcdev, unsigned int pin, case PIN_CONFIG_BIAS_PULL_DOWN: case PIN_CONFIG_BIAS_PULL_UP: if (meson_pinconf_get_pull(pc, pin) == param) - arg = 1; + arg = 60000; else return -EINVAL; break; From 457d9772e8a5cdae64f66b5f7d5b0247365191ec Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Tue, 1 Apr 2025 15:50:21 +0200 Subject: [PATCH 044/559] pinctrl: airoha: fix wrong PHY LED mapping and PHY2 LED defines The current PHY2 LED define are wrong and actually set BITs outside the related mask. Fix it and set the correct value. While at it, also use FIELD_PREP_CONST macro to make it simple to understand what values are actually applied for the mask. Also fix wrong PHY LED mapping. The SoC Switch supports up to 4 port but the register define mapping for 5 PHY port, starting from 0. The mapping was wrongly defined starting from PHY1. Reorder the function group to start from PHY0. PHY4 is actually never supported as we don't have a GPIO pin to assign. Cc: stable@vger.kernel.org Fixes: 1c8ace2d0725 ("pinctrl: airoha: Add support for EN7581 SoC") Reviewed-by: Benjamin Larsson Signed-off-by: Christian Marangi Acked-by: Lorenzo Bianconi Link: https://lore.kernel.org/20250401135026.18018-1-ansuelsmth@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-airoha.c | 159 ++++++++++------------ 1 file changed, 70 insertions(+), 89 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-airoha.c b/drivers/pinctrl/mediatek/pinctrl-airoha.c index 547a798b71c8..5d84a778683d 100644 --- a/drivers/pinctrl/mediatek/pinctrl-airoha.c +++ b/drivers/pinctrl/mediatek/pinctrl-airoha.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -112,39 +113,19 @@ #define REG_LAN_LED1_MAPPING 0x0280 #define LAN4_LED_MAPPING_MASK GENMASK(18, 16) -#define LAN4_PHY4_LED_MAP BIT(18) -#define LAN4_PHY2_LED_MAP BIT(17) -#define LAN4_PHY1_LED_MAP BIT(16) -#define LAN4_PHY0_LED_MAP 0 -#define LAN4_PHY3_LED_MAP GENMASK(17, 16) +#define LAN4_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN4_LED_MAPPING_MASK, (_n)) #define LAN3_LED_MAPPING_MASK GENMASK(14, 12) -#define LAN3_PHY4_LED_MAP BIT(14) -#define LAN3_PHY2_LED_MAP BIT(13) -#define LAN3_PHY1_LED_MAP BIT(12) -#define LAN3_PHY0_LED_MAP 0 -#define LAN3_PHY3_LED_MAP GENMASK(13, 12) +#define LAN3_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN3_LED_MAPPING_MASK, (_n)) #define LAN2_LED_MAPPING_MASK GENMASK(10, 8) -#define LAN2_PHY4_LED_MAP BIT(12) -#define LAN2_PHY2_LED_MAP BIT(11) -#define LAN2_PHY1_LED_MAP BIT(10) -#define LAN2_PHY0_LED_MAP 0 -#define LAN2_PHY3_LED_MAP GENMASK(11, 10) +#define LAN2_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN2_LED_MAPPING_MASK, (_n)) #define LAN1_LED_MAPPING_MASK GENMASK(6, 4) -#define LAN1_PHY4_LED_MAP BIT(6) -#define LAN1_PHY2_LED_MAP BIT(5) -#define LAN1_PHY1_LED_MAP BIT(4) -#define LAN1_PHY0_LED_MAP 0 -#define LAN1_PHY3_LED_MAP GENMASK(5, 4) +#define LAN1_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN1_LED_MAPPING_MASK, (_n)) #define LAN0_LED_MAPPING_MASK GENMASK(2, 0) -#define LAN0_PHY4_LED_MAP BIT(3) -#define LAN0_PHY2_LED_MAP BIT(2) -#define LAN0_PHY1_LED_MAP BIT(1) -#define LAN0_PHY0_LED_MAP 0 -#define LAN0_PHY3_LED_MAP GENMASK(2, 1) +#define LAN0_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN0_LED_MAPPING_MASK, (_n)) /* CONF */ #define REG_I2C_SDA_E2 0x001c @@ -1476,8 +1457,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY1_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1491,8 +1472,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY1_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1506,8 +1487,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY1_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1521,8 +1502,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY1_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(0) }, .regmap_size = 2, }, @@ -1540,8 +1521,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY2_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1555,8 +1536,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY2_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1570,8 +1551,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY2_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1585,8 +1566,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY2_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(1) }, .regmap_size = 2, }, @@ -1604,8 +1585,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY3_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1619,8 +1600,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY3_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1634,8 +1615,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY3_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1649,8 +1630,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY3_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(2) }, .regmap_size = 2, }, @@ -1668,8 +1649,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY4_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1683,8 +1664,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY4_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1698,8 +1679,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY4_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1713,8 +1694,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY4_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(3) }, .regmap_size = 2, }, @@ -1732,8 +1713,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY1_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1747,8 +1728,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY1_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1762,8 +1743,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY1_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1777,8 +1758,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY1_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(0) }, .regmap_size = 2, }, @@ -1796,8 +1777,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY2_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1811,8 +1792,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY2_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1826,8 +1807,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY2_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1841,8 +1822,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY2_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(1) }, .regmap_size = 2, }, @@ -1860,8 +1841,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY3_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1875,8 +1856,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY3_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1890,8 +1871,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY3_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1905,8 +1886,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY3_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(2) }, .regmap_size = 2, }, @@ -1924,8 +1905,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY4_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1939,8 +1920,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY4_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1954,8 +1935,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY4_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1969,8 +1950,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY4_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(3) }, .regmap_size = 2, }, From 63ec4baf725cbde506f0a9640ae6751622b81b0a Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 15 Apr 2025 13:29:27 +0100 Subject: [PATCH 045/559] ASoC: Add Cirrus and Wolfson headers to ASoC section of MAINTAINERS Specifically list various Cirrus Logic and Wolfson Micro codec header files under include/sound/ within the ASoC section of MAINTAINERS. Note that not all the include/sound/cs* files are part of ASoC, so more-specific patterns are needed. These files are all part of ASoC codec drivers, and are owned by specific Cirrus Logic and Wolfson Micro sections of MAINTAINERS. But the overall include/sound/* maintainership is only Takashi Iwai and Jaroslav Kysela. So by default get_maintainer.pl would only show Takashi and Jaroslav as maintainers for any patch that changes these files without changing any code under sound/soc. There is a separate MAINTAINERS section for ASoC, so the headers must be added there to make the ASoC maintainers show up in get_maintainer.pl. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250415122927.512200-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ad08ca0f423b..4878b77f71ee 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22656,9 +22656,15 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git F: Documentation/devicetree/bindings/sound/ F: Documentation/sound/soc/ F: include/dt-bindings/sound/ +F: include/sound/cs-amp-lib.h +F: include/sound/cs35l* +F: include/sound/cs4271.h +F: include/sound/cs42l* +F: include/sound/madera-pdata.h F: include/sound/soc* F: include/sound/sof.h F: include/sound/sof/ +F: include/sound/wm*.h F: include/trace/events/sof*.h F: include/uapi/sound/asoc.h F: sound/soc/ From b2accfe7ca5bc9f9af28e603b79bdd5ad8df5c0b Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Tue, 1 Apr 2025 06:12:18 +0530 Subject: [PATCH 046/559] powerpc/boot: Check for ld-option support Commit 579aee9fc594 ("powerpc: suppress some linker warnings in recent linker versions") enabled support to add linker option "--no-warn-rwx-segments", if the version is greater than 2.39. Similar build warning were reported recently from linker version 2.35.2. ld: warning: arch/powerpc/boot/zImage.epapr has a LOAD segment with RWX permissions ld: warning: arch/powerpc/boot/zImage.pseries has a LOAD segment with RWX permissions Fix the warning by checking for "--no-warn-rwx-segments" option support in linker to enable it, instead of checking for the version range. Fixes: 579aee9fc594 ("powerpc: suppress some linker warnings in recent linker versions") Reported-by: Venkat Rao Bagalkote Suggested-by: Christophe Leroy Tested-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/linuxppc-dev/61cf556c-4947-4bd6-af63-892fc0966dad@linux.ibm.com/ Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250401004218.24869-1-maddy@linux.ibm.com --- arch/powerpc/boot/wrapper | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 1db60fe13802..267ca6d4d9b3 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -234,10 +234,8 @@ fi # suppress some warnings in recent ld versions nowarn="-z noexecstack" -if ! ld_is_lld; then - if [ "$LD_VERSION" -ge "$(echo 2.39 | ld_version)" ]; then - nowarn="$nowarn --no-warn-rwx-segments" - fi +if [ $(${CROSS}ld -v --no-warn-rwx-segments &>/dev/null; echo $?) -eq 0 ]; then + nowarn="$nowarn --no-warn-rwx-segments" fi platformo=$object/"$platform".o From bbc9462f0cb0c8917a4908e856731708f0cee910 Mon Sep 17 00:00:00 2001 From: Shyam Saini Date: Thu, 27 Feb 2025 10:49:27 -0800 Subject: [PATCH 047/559] kernel: param: rename locate_module_kobject The locate_module_kobject() function looks up an existing module_kobject for a given module name. If it cannot find the corresponding module_kobject, it creates one for the given name. This commit renames locate_module_kobject() to lookup_or_create_module_kobject() to better describe its operations. This doesn't change anything functionality wise. Suggested-by: Rasmus Villemoes Signed-off-by: Shyam Saini Link: https://lore.kernel.org/r/20250227184930.34163-2-shyamsaini@linux.microsoft.com Signed-off-by: Petr Pavlu --- kernel/params.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/params.c b/kernel/params.c index 2509f216c9f3..a2441ce059ae 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -760,7 +760,7 @@ void destroy_params(const struct kernel_param *params, unsigned num) params[i].ops->free(params[i].arg); } -static struct module_kobject * __init locate_module_kobject(const char *name) +static struct module_kobject * __init lookup_or_create_module_kobject(const char *name) { struct module_kobject *mk; struct kobject *kobj; @@ -802,7 +802,7 @@ static void __init kernel_add_sysfs_param(const char *name, struct module_kobject *mk; int err; - mk = locate_module_kobject(name); + mk = lookup_or_create_module_kobject(name); if (!mk) return; @@ -873,7 +873,7 @@ static void __init version_sysfs_builtin(void) int err; for (vattr = __start___modver; vattr < __stop___modver; vattr++) { - mk = locate_module_kobject(vattr->module_name); + mk = lookup_or_create_module_kobject(vattr->module_name); if (mk) { err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); WARN_ON_ONCE(err); From 1c7777feb0e2f5925908c489513656ebb443a699 Mon Sep 17 00:00:00 2001 From: Shyam Saini Date: Thu, 27 Feb 2025 10:49:28 -0800 Subject: [PATCH 048/559] kernel: refactor lookup_or_create_module_kobject() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the unlikely event of the allocation failing, it is better to let the machine boot with a not fully populated sysfs than to kill it with this BUG_ON(). All callers are already prepared for lookup_or_create_module_kobject() returning NULL. This is also preparation for calling this function from non __init code, where using BUG_ON for allocation failure handling is not acceptable. Since we are here, also start using IS_ENABLED instead of #ifdef construct. Suggested-by: Thomas Weißschuh Suggested-by: Rasmus Villemoes Signed-off-by: Shyam Saini Link: https://lore.kernel.org/r/20250227184930.34163-3-shyamsaini@linux.microsoft.com Signed-off-by: Petr Pavlu --- kernel/params.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/kernel/params.c b/kernel/params.c index a2441ce059ae..787662663e34 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -767,31 +767,28 @@ static struct module_kobject * __init lookup_or_create_module_kobject(const char int err; kobj = kset_find_obj(module_kset, name); - if (kobj) { - mk = to_module_kobject(kobj); - } else { - mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); - BUG_ON(!mk); + if (kobj) + return to_module_kobject(kobj); - mk->mod = THIS_MODULE; - mk->kobj.kset = module_kset; - err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, - "%s", name); -#ifdef CONFIG_MODULES - if (!err) - err = sysfs_create_file(&mk->kobj, &module_uevent.attr); -#endif - if (err) { - kobject_put(&mk->kobj); - pr_crit("Adding module '%s' to sysfs failed (%d), the system may be unstable.\n", - name, err); - return NULL; - } + mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); + if (!mk) + return NULL; - /* So that we hold reference in both cases. */ - kobject_get(&mk->kobj); + mk->mod = THIS_MODULE; + mk->kobj.kset = module_kset; + err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, "%s", name); + if (IS_ENABLED(CONFIG_MODULES) && !err) + err = sysfs_create_file(&mk->kobj, &module_uevent.attr); + if (err) { + kobject_put(&mk->kobj); + pr_crit("Adding module '%s' to sysfs failed (%d), the system may be unstable.\n", + name, err); + return NULL; } + /* So that we hold reference in both cases. */ + kobject_get(&mk->kobj); + return mk; } From 7c76c813cfc42a7376378a0c4b7250db2eebab81 Mon Sep 17 00:00:00 2001 From: Shyam Saini Date: Thu, 27 Feb 2025 10:49:29 -0800 Subject: [PATCH 049/559] kernel: globalize lookup_or_create_module_kobject() lookup_or_create_module_kobject() is marked as static and __init, to make it global drop static keyword. Since this function can be called from non-init code, use __modinit instead of __init, __modinit marker will make it __init if CONFIG_MODULES is not defined. Suggested-by: Rasmus Villemoes Signed-off-by: Shyam Saini Link: https://lore.kernel.org/r/20250227184930.34163-4-shyamsaini@linux.microsoft.com Signed-off-by: Petr Pavlu --- include/linux/module.h | 2 ++ kernel/params.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/module.h b/include/linux/module.h index d94b196d5a34..b3329110d668 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -162,6 +162,8 @@ extern void cleanup_module(void); #define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ +struct module_kobject *lookup_or_create_module_kobject(const char *name); + /* Generic info of form tag = "info" */ #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) diff --git a/kernel/params.c b/kernel/params.c index 787662663e34..e668fc90b83e 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -760,7 +760,7 @@ void destroy_params(const struct kernel_param *params, unsigned num) params[i].ops->free(params[i].arg); } -static struct module_kobject * __init lookup_or_create_module_kobject(const char *name) +struct module_kobject __modinit * lookup_or_create_module_kobject(const char *name) { struct module_kobject *mk; struct kobject *kobj; From 68715cb5c0e00284d93f976c6368809f64131b0b Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Tue, 15 Apr 2025 14:41:34 -0500 Subject: [PATCH 050/559] ASoC: Intel: sof_sdw: Add NULL check in asoc_sdw_rt_dmic_rtd_init() mic_name returned by devm_kasprintf() could be NULL. Add a check for it. Signed-off-by: Chenyuan Yang Fixes: bee2fe44679f ("ASoC: Intel: sof_sdw: use generic rtd_init function for Realtek SDW DMICs") Link: https://patch.msgid.link/20250415194134.292830-1-chenyuan0y@gmail.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_rt_dmic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sdw_utils/soc_sdw_rt_dmic.c b/sound/soc/sdw_utils/soc_sdw_rt_dmic.c index 46d917a99c51..97be110a59b6 100644 --- a/sound/soc/sdw_utils/soc_sdw_rt_dmic.c +++ b/sound/soc/sdw_utils/soc_sdw_rt_dmic.c @@ -29,6 +29,8 @@ int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_da mic_name = devm_kasprintf(card->dev, GFP_KERNEL, "rt715-sdca"); else mic_name = devm_kasprintf(card->dev, GFP_KERNEL, "%s", component->name_prefix); + if (!mic_name) + return -ENOMEM; card->components = devm_kasprintf(card->dev, GFP_KERNEL, "%s mic:%s", card->components, From f95bbfe18512c5c018720468959edac056a17196 Mon Sep 17 00:00:00 2001 From: Shyam Saini Date: Thu, 27 Feb 2025 10:49:30 -0800 Subject: [PATCH 051/559] drivers: base: handle module_kobject creation module_add_driver() relies on module_kset list for /sys/module//drivers directory creation. Since, commit 96a1a2412acba ("kernel/params.c: defer most of param_sysfs_init() to late_initcall time") drivers which are initialized from subsys_initcall() or any other higher precedence initcall couldn't find the related kobject entry in the module_kset list because module_kset is not fully populated by the time module_add_driver() refers it. As a consequence, module_add_driver() returns early without calling make_driver_name(). Therefore, /sys/module//drivers is never created. Fix this issue by letting module_add_driver() handle module_kobject creation itself. Fixes: 96a1a2412acb ("kernel/params.c: defer most of param_sysfs_init() to late_initcall time") Cc: stable@vger.kernel.org # requires all other patches from the series Suggested-by: Rasmus Villemoes Signed-off-by: Shyam Saini Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250227184930.34163-5-shyamsaini@linux.microsoft.com Signed-off-by: Petr Pavlu --- drivers/base/module.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/base/module.c b/drivers/base/module.c index 5bc71bea883a..218aaa096455 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -42,16 +42,13 @@ int module_add_driver(struct module *mod, const struct device_driver *drv) if (mod) mk = &mod->mkobj; else if (drv->mod_name) { - struct kobject *mkobj; - - /* Lookup built-in module entry in /sys/modules */ - mkobj = kset_find_obj(module_kset, drv->mod_name); - if (mkobj) { - mk = container_of(mkobj, struct module_kobject, kobj); + /* Lookup or create built-in module entry in /sys/modules */ + mk = lookup_or_create_module_kobject(drv->mod_name); + if (mk) { /* remember our module structure */ drv->p->mkobj = mk; - /* kset_find_obj took a reference */ - kobject_put(mkobj); + /* lookup_or_create_module_kobject took a reference */ + kobject_put(&mk->kobj); } } From fe412e3a6c97cfe49ecf4564a278122f7d78e3f0 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 15 Apr 2025 19:23:37 +0800 Subject: [PATCH 052/559] pinctrl: mediatek: common-v1: Fix EINT breakage on older controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When EINT support for multiple addresses was introduced, the driver library for the older generations (pinctrl-mtk-common) was not fixed together. This resulted in invalid pointer accesses. Fix up the filled in |struct mtk_eint| in pinctrl-mtk-common to match what is now expected by the mtk-eint library. Reported-by: Uwe Kleine-König Tested-by: Uwe Kleine-König Closes: https://lore.kernel.org/all/43nd5jxpk7b7fv46frqlfjnqfh5jlpqsemeoakqzd4wdi3df6y@w7ycd3k5ezvn/ Fixes: 3ef9f710efcb ("pinctrl: mediatek: Add EINT support for multiple addresses") Cc: Hao Chang Cc: Qingliang Li Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/20250415112339.2385454-1-wenst@chromium.org Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 91edb539925a..7585de11854b 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1015,7 +1015,13 @@ static int mtk_eint_init(struct mtk_pinctrl *pctl, struct platform_device *pdev) if (!pctl->eint) return -ENOMEM; - pctl->eint->base = devm_platform_ioremap_resource(pdev, 0); + pctl->eint->nbase = 1; + /* mtk-eint expects an array */ + pctl->eint->base = devm_kzalloc(pctl->dev, sizeof(pctl->eint->base), GFP_KERNEL); + if (IS_ERR(pctl->eint->base)) + return -ENOMEM; + + pctl->eint->base[0] = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pctl->eint->base)) return PTR_ERR(pctl->eint->base); From 9f5595d5f03fd4dc640607a71e89a1daa68fd19d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 14 Apr 2025 11:24:00 -0500 Subject: [PATCH 053/559] platform/x86/amd: pmc: Require at least 2.5 seconds between HW sleep cycles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an APU exits HW sleep with no active wake sources the Linux kernel will rapidly assert that the APU can enter back into HW sleep. This happens in a few ms. Contrasting this to Windows, Windows can take 10s of seconds to enter back into the resiliency phase for Modern Standby. For some situations this can be problematic because it can cause leakage from VDDCR_SOC to VDD_MISC and force VDD_MISC outside of the electrical design guide specifications. On some designs this will trip the over voltage protection feature (OVP) of the voltage regulator module, but it could cause APU damage as well. To prevent this risk, add an explicit sleep call so that future attempts to enter into HW sleep will have enough time to settle. This will occur while the screen is dark and only on cases that the APU should enter HW sleep again, so it shouldn't be noticeable to any user. Cc: stable@vger.kernel.org Signed-off-by: Mario Limonciello Acked-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20250414162446.3853194-1-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmc/pmc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index d789d6cab794..0329fafe14eb 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -644,10 +644,9 @@ static void amd_pmc_s2idle_check(void) struct smu_metrics table; int rc; - /* CZN: Ensure that future s0i3 entry attempts at least 10ms passed */ - if (pdev->cpu_id == AMD_CPU_ID_CZN && !get_metrics_table(pdev, &table) && - table.s0i3_last_entry_status) - usleep_range(10000, 20000); + /* Avoid triggering OVP */ + if (!get_metrics_table(pdev, &table) && table.s0i3_last_entry_status) + msleep(2500); /* Dump the IdleMask before we add to the STB */ amd_pmc_idlemask_read(pdev, pdev->dev, NULL); From 8d6955ed76e8a47115f2ea1d9c263ee6f505d737 Mon Sep 17 00:00:00 2001 From: Shouye Liu Date: Thu, 17 Apr 2025 11:23:21 +0800 Subject: [PATCH 054/559] platform/x86/intel-uncore-freq: Fix missing uncore sysfs during CPU hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In certain situations, the sysfs for uncore may not be present when all CPUs in a package are offlined and then brought back online after boot. This issue can occur if there is an error in adding the sysfs entry due to a memory allocation failure. Retrying to bring the CPUs online will not resolve the issue, as the uncore_cpu_mask is already set for the package before the failure condition occurs. This issue does not occur if the failure happens during module initialization, as the module will fail to load in the event of any error. To address this, ensure that the uncore_cpu_mask is not set until the successful return of uncore_freq_add_entry(). Fixes: dbce412a7733 ("platform/x86/intel-uncore-freq: Split common and enumeration part") Signed-off-by: Shouye Liu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250417032321.75580-1-shouyeliu@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../x86/intel/uncore-frequency/uncore-frequency.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c index 40bbf8e45fa4..bdee5d00f30b 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c @@ -146,15 +146,13 @@ static int uncore_event_cpu_online(unsigned int cpu) { struct uncore_data *data; int target; + int ret; /* Check if there is an online cpu in the package for uncore MSR */ target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); if (target < nr_cpu_ids) return 0; - /* Use this CPU on this die as a control CPU */ - cpumask_set_cpu(cpu, &uncore_cpu_mask); - data = uncore_get_instance(cpu); if (!data) return 0; @@ -163,7 +161,14 @@ static int uncore_event_cpu_online(unsigned int cpu) data->die_id = topology_die_id(cpu); data->domain_id = UNCORE_DOMAIN_ID_INVALID; - return uncore_freq_add_entry(data, cpu); + ret = uncore_freq_add_entry(data, cpu); + if (ret) + return ret; + + /* Use this CPU on this die as a control CPU */ + cpumask_set_cpu(cpu, &uncore_cpu_mask); + + return 0; } static int uncore_event_cpu_offline(unsigned int cpu) From 4a8e04e2bdcb98d513e97b039899bda03b07bcf2 Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Wed, 16 Apr 2025 13:50:23 -0300 Subject: [PATCH 055/559] platform/x86: alienware-wmi-wmax: Fix uninitialized variable due to bad error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wmax_thermal_information() may also return -ENOMSG, which would leave `id` uninitialized in thermal_profile_probe. Reorder and modify logic to catch all errors. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/Z_-KVqNbD9ygvE2X@stanley.mountain Fixes: 27e9e6339896 ("platform/x86: alienware-wmi: Refactor thermal control methods") Signed-off-by: Kurt Borja Link: https://lore.kernel.org/r/20250416-smatch-fix-v1-1-35491b462d8f@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/alienware-wmi-wmax.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index 0c3be03385f8..3f9e1e986ecf 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -655,12 +655,10 @@ static int thermal_profile_probe(void *drvdata, unsigned long *choices) for (u32 i = 0; i < sys_desc[3]; i++) { ret = wmax_thermal_information(priv->wdev, WMAX_OPERATION_LIST_IDS, i + first_mode, &out_data); - - if (ret == -EIO) - return ret; - if (ret == -EBADRQC) break; + if (ret) + return ret; if (!is_wmax_thermal_code(out_data)) continue; From 45e00e36718902d81bdaebb37b3a8244e685bc48 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V (Arm)" Date: Tue, 8 Apr 2025 09:03:51 +0530 Subject: [PATCH 056/559] iommu/arm-smmu-v3: Add missing S2FWB feature detection Commit 67e4fe398513 ("iommu/arm-smmu-v3: Use S2FWB for NESTED domains") introduced S2FWB usage but omitted the corresponding feature detection. As a result, vIOMMU allocation fails on FVP in arm_vsmmu_alloc(), due to the following check: if (!arm_smmu_master_canwbs(master) && !(smmu->features & ARM_SMMU_FEAT_S2FWB)) return ERR_PTR(-EOPNOTSUPP); This patch adds the missing detection logic to prevent allocation failure when S2FWB is supported. Fixes: 67e4fe398513 ("iommu/arm-smmu-v3: Use S2FWB for NESTED domains") Signed-off-by: Aneesh Kumar K.V (Arm) Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Reviewed-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20250408033351.1012411-1-aneesh.kumar@kernel.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b4c21aaed126..5467f85dd463 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4429,6 +4429,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); if (FIELD_GET(IDR3_RIL, reg)) smmu->features |= ARM_SMMU_FEAT_RANGE_INV; + if (FIELD_GET(IDR3_FWB, reg)) + smmu->features |= ARM_SMMU_FEAT_S2FWB; /* IDR5 */ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); From 12f78021973ae422564b234136c702a305932d73 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Sat, 12 Apr 2025 10:23:54 +1000 Subject: [PATCH 057/559] iommu/arm-smmu-v3: Fix pgsize_bit for sva domains UBSan caught a bug with IOMMU SVA domains, where the reported exponent value in __arm_smmu_tlb_inv_range() was >= 64. __arm_smmu_tlb_inv_range() uses the domain's pgsize_bitmap to compute the number of pages to invalidate and the invalidation range. Currently arm_smmu_sva_domain_alloc() does not setup the iommu domain's pgsize_bitmap. This leads to __ffs() on the value returning 64 and that leads to undefined behaviour w.r.t. shift operations Fix this by initializing the iommu_domain's pgsize_bitmap to PAGE_SIZE. Effectively the code needs to use the smallest page size for invalidation Cc: stable@vger.kernel.org Fixes: eb6c97647be2 ("iommu/arm-smmu-v3: Avoid constructing invalid range commands") Suggested-by: Jason Gunthorpe Signed-off-by: Balbir Singh Cc: Jean-Philippe Brucker Cc: Will Deacon Cc: Robin Murphy Cc: Joerg Roedel Cc: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250412002354.3071449-1-balbirs@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 9ba596430e7c..980cc6b33c43 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -411,6 +411,12 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, return ERR_CAST(smmu_domain); smmu_domain->domain.type = IOMMU_DOMAIN_SVA; smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; + + /* + * Choose page_size as the leaf page size for invalidation when + * ARM_SMMU_FEAT_RANGE_INV is present + */ + smmu_domain->domain.pgsize_bitmap = PAGE_SIZE; smmu_domain->smmu = smmu; ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, From b00d24997a11c10d3e420614f0873b83ce358a34 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 15 Apr 2025 11:56:20 -0700 Subject: [PATCH 058/559] iommu/arm-smmu-v3: Fix iommu_device_probe bug due to duplicated stream ids ASPEED VGA card has two built-in devices: 0008:06:00.0 PCI bridge: ASPEED Technology, Inc. AST1150 PCI-to-PCI Bridge (rev 06) 0008:07:00.0 VGA compatible controller: ASPEED Technology, Inc. ASPEED Graphics Family (rev 52) Its toplogy looks like this: +-[0008:00]---00.0-[01-09]--+-00.0-[02-09]--+-00.0-[03]----00.0 Sandisk Corp Device 5017 | +-01.0-[04]-- | +-02.0-[05]----00.0 NVIDIA Corporation Device | +-03.0-[06-07]----00.0-[07]----00.0 ASPEED Technology, Inc. ASPEED Graphics Family | +-04.0-[08]----00.0 Renesas Technology Corp. uPD720201 USB 3.0 Host Controller | \-05.0-[09]----00.0 Realtek Semiconductor Co., Ltd. RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller \-00.1 PMC-Sierra Inc. Device 4028 The IORT logic populaties two identical IDs into the fwspec->ids array via DMA aliasing in iort_pci_iommu_init() called by pci_for_each_dma_alias(). Though the SMMU driver had been able to handle this situation since commit 563b5cbe334e ("iommu/arm-smmu-v3: Cope with duplicated Stream IDs"), that got broken by the later commit cdf315f907d4 ("iommu/arm-smmu-v3: Maintain a SID->device structure"), which ended up with allocating separate streams with the same stuffing. On a kernel prior to v6.15-rc1, there has been an overlooked warning: pci 0008:07:00.0: vgaarb: setting as boot VGA device pci 0008:07:00.0: vgaarb: bridge control possible pci 0008:07:00.0: vgaarb: VGA device added: decodes=io+mem,owns=none,locks=none pcieport 0008:06:00.0: Adding to iommu group 14 ast 0008:07:00.0: stream 67328 already in tree <===== WARNING ast 0008:07:00.0: enabling device (0002 -> 0003) ast 0008:07:00.0: Using default configuration ast 0008:07:00.0: AST 2600 detected ast 0008:07:00.0: [drm] Using analog VGA ast 0008:07:00.0: [drm] dram MCLK=396 Mhz type=1 bus_width=16 [drm] Initialized ast 0.1.0 for 0008:07:00.0 on minor 0 ast 0008:07:00.0: [drm] fb0: astdrmfb frame buffer device With v6.15-rc, since the commit bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path"), the error returned with the warning is moved to the SMMU device probe flow: arm_smmu_probe_device+0x15c/0x4c0 __iommu_probe_device+0x150/0x4f8 probe_iommu_group+0x44/0x80 bus_for_each_dev+0x7c/0x100 bus_iommu_probe+0x48/0x1a8 iommu_device_register+0xb8/0x178 arm_smmu_device_probe+0x1350/0x1db0 which then fails the entire SMMU driver probe: pci 0008:06:00.0: Adding to iommu group 21 pci 0008:07:00.0: stream 67328 already in tree arm-smmu-v3 arm-smmu-v3.9.auto: Failed to register iommu arm-smmu-v3 arm-smmu-v3.9.auto: probe with driver arm-smmu-v3 failed with error -22 Since SMMU driver had been already expecting a potential duplicated Stream ID in arm_smmu_install_ste_for_dev(), change the arm_smmu_insert_master() routine to ignore a duplicated ID from the fwspec->sids array as well. Note: this has been failing the iommu_device_probe() since 2021, although a recent iommu commit in v6.15-rc1 that moves iommu_device_probe() started to fail the SMMU driver probe. Since nobody has cared about DMA Alias support, leave that as it was but fix the fundamental iommu_device_probe() breakage. Fixes: cdf315f907d4 ("iommu/arm-smmu-v3: Maintain a SID->device structure") Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20250415185620.504299-1-nicolinc@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 5467f85dd463..0826b6bdf327 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3388,6 +3388,7 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, mutex_lock(&smmu->streams_mutex); for (i = 0; i < fwspec->num_ids; i++) { struct arm_smmu_stream *new_stream = &master->streams[i]; + struct rb_node *existing; u32 sid = fwspec->ids[i]; new_stream->id = sid; @@ -3398,10 +3399,20 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, break; /* Insert into SID tree */ - if (rb_find_add(&new_stream->node, &smmu->streams, - arm_smmu_streams_cmp_node)) { - dev_warn(master->dev, "stream %u already in tree\n", - sid); + existing = rb_find_add(&new_stream->node, &smmu->streams, + arm_smmu_streams_cmp_node); + if (existing) { + struct arm_smmu_master *existing_master = + rb_entry(existing, struct arm_smmu_stream, node) + ->master; + + /* Bridged PCI devices may end up with duplicated IDs */ + if (existing_master == master) + continue; + + dev_warn(master->dev, + "stream %u already in tree from dev %s\n", sid, + dev_name(existing_master->dev)); ret = -EINVAL; break; } From 2d00c34d665bc23f5200962dbc4ac1919317036c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 11 Apr 2025 15:09:14 +0100 Subject: [PATCH 059/559] iommu/arm-smmu-v3: Fail aliasing StreamIDs more gracefully We've never supported StreamID aliasing between devices, and as such they will never have had functioning DMA, but this is not fatal to the SMMU itself. Although aliasing between hard-wired platform device StreamIDs would tend to raise questions about the whole system, in practice it's far more likely to occur relatively innocently due to legacy PCI bridges, where the underlying StreamID mappings are still perfectly reasonable. As such, return a more benign -ENODEV when failing probe for such an unsupported device (and log a more obvious error message), so that it doesn't break the entire SMMU probe now that bus_iommu_probe() runs in the right order and can propagate that error back. The end result is still that the device doesn't get an IOMMU group and probably won't work, same as before. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/39d54e49c8476efc4653e352150d44b185d6d50f.1744380554.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 0826b6bdf327..48d910399a1b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3411,9 +3411,9 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, continue; dev_warn(master->dev, - "stream %u already in tree from dev %s\n", sid, - dev_name(existing_master->dev)); - ret = -EINVAL; + "Aliasing StreamID 0x%x (from %s) unsupported, expect DMA to be broken\n", + sid, dev_name(existing_master->dev)); + ret = -ENODEV; break; } } From 8dee308e4c01dea48fc104d37f92d5b58c50b96c Mon Sep 17 00:00:00 2001 From: Pavel Paklov Date: Tue, 25 Mar 2025 09:22:44 +0000 Subject: [PATCH 060/559] iommu/amd: Fix potential buffer overflow in parse_ivrs_acpihid There is a string parsing logic error which can lead to an overflow of hid or uid buffers. Comparing ACPIID_LEN against a total string length doesn't take into account the lengths of individual hid and uid buffers so the check is insufficient in some cases. For example if the length of hid string is 4 and the length of the uid string is 260, the length of str will be equal to ACPIID_LEN + 1 but uid string will overflow uid buffer which size is 256. The same applies to the hid string with length 13 and uid string with length 250. Check the length of hid and uid strings separately to prevent buffer overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter") Cc: stable@vger.kernel.org Signed-off-by: Pavel Paklov Link: https://lore.kernel.org/r/20250325092259.392844-1-Pavel.Paklov@cyberprotect.ru Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index dd9e26b7b718..14aa0d77df26 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3664,6 +3664,14 @@ found: while (*uid == '0' && *(uid + 1)) uid++; + if (strlen(hid) >= ACPIHID_HID_LEN) { + pr_err("Invalid command line: hid is too long\n"); + return 1; + } else if (strlen(uid) >= ACPIHID_UID_LEN) { + pr_err("Invalid command line: uid is too long\n"); + return 1; + } + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); From 30a3f2f3e4bd6335b727c83c08a982d969752bc1 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 14 Apr 2025 12:16:35 -0700 Subject: [PATCH 061/559] iommu: Fix two issues in iommu_copy_struct_from_user() In the review for iommu_copy_struct_to_user() helper, Matt pointed out that a NULL pointer should be rejected prior to dereferencing it: https://lore.kernel.org/all/86881827-8E2D-461C-BDA3-FA8FD14C343C@nvidia.com And Alok pointed out a typo at the same time: https://lore.kernel.org/all/480536af-6830-43ce-a327-adbd13dc3f1d@oracle.com Since both issues were copied from iommu_copy_struct_from_user(), fix them first in the current header. Fixes: e9d36c07bb78 ("iommu: Add iommu_copy_struct_from_user helper") Cc: stable@vger.kernel.org Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Acked-by: Alok Tiwari Reviewed-by: Matthew R. Ochs Link: https://lore.kernel.org/r/20250414191635.450472-1-nicolinc@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ccce8a751e2a..3a8d35d41fda 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -440,10 +440,10 @@ static inline int __iommu_copy_struct_from_user( void *dst_data, const struct iommu_user_data *src_data, unsigned int data_type, size_t data_len, size_t min_len) { - if (src_data->type != data_type) - return -EINVAL; if (WARN_ON(!dst_data || !src_data)) return -EINVAL; + if (src_data->type != data_type) + return -EINVAL; if (src_data->len < min_len || data_len < src_data->len) return -EINVAL; return copy_struct_from_user(dst_data, data_len, src_data->uptr, @@ -456,8 +456,8 @@ static inline int __iommu_copy_struct_from_user( * include/uapi/linux/iommufd.h * @user_data: Pointer to a struct iommu_user_data for user space data info * @data_type: The data type of the @kdst. Must match with @user_data->type - * @min_last: The last memember of the data structure @kdst points in the - * initial version. + * @min_last: The last member of the data structure @kdst points in the initial + * version. * Return 0 for success, otherwise -error. */ #define iommu_copy_struct_from_user(kdst, user_data, data_type, min_last) \ From 4f1492efb495bcef34c9ee8a94af81e6cea5abf4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Apr 2025 15:36:08 +0800 Subject: [PATCH 062/559] iommu/vt-d: Revert ATS timing change to fix boot failure Commit <5518f239aff1> ("iommu/vt-d: Move scalable mode ATS enablement to probe path") changed the PCI ATS enablement logic to run earlier, specifically before the default domain attachment. On some client platforms, this change resulted in boot failures, causing the kernel to panic with the following message and call trace: Kernel panic - not syncing: DMAR hardware is malfunctioning CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.14.0-rc3+ #175 Call Trace: dump_stack_lvl+0x6f/0xb0 dump_stack+0x10/0x16 panic+0x10a/0x2b7 iommu_enable_translation.cold+0xc/0xc intel_iommu_init+0xe39/0xec0 ? trace_hardirqs_on+0x1e/0xd0 ? __pfx_pci_iommu_init+0x10/0x10 pci_iommu_init+0xd/0x40 do_one_initcall+0x5b/0x390 kernel_init_freeable+0x26d/0x2b0 ? __pfx_kernel_init+0x10/0x10 kernel_init+0x15/0x120 ret_from_fork+0x35/0x60 ? __pfx_kernel_init+0x10/0x10 ret_from_fork_asm+0x1a/0x30 RIP: 1f0f:0x0 Code: Unable to access opcode bytes at 0xffffffffffffffd6. RSP: 0000:0000000000000000 EFLAGS: 841f0f2e66 ORIG_RAX: 1f0f2e6600000000 RAX: 0000000000000000 RBX: 1f0f2e6600000000 RCX: 2e66000000000084 RDX: 0000000000841f0f RSI: 000000841f0f2e66 RDI: 00841f0f2e660000 RBP: 00841f0f2e660000 R08: 00841f0f2e660000 R09: 000000841f0f2e66 R10: 0000000000841f0f R11: 2e66000000000084 R12: 000000841f0f2e66 R13: 0000000000841f0f R14: 2e66000000000084 R15: 1f0f2e6600000000 ---[ end Kernel panic - not syncing: DMAR hardware is malfunctioning ]--- Fix this by reverting the timing change for ATS enablement introduced by the offending commit and restoring the previous behavior. Fixes: 5518f239aff1 ("iommu/vt-d: Move scalable mode ATS enablement to probe path") Reported-by: Jarkko Nikula Closes: https://lore.kernel.org/linux-iommu/01b9c72f-460d-4f77-b696-54c6825babc9@linux.intel.com/ Signed-off-by: Lu Baolu Tested-by: Jarkko Nikula Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20250416073608.1799578-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b29da2d96d0b..e60b699e7b8c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3785,20 +3785,6 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) intel_iommu_debugfs_create_dev(info); - /* - * The PCIe spec, in its wisdom, declares that the behaviour of the - * device is undefined if you enable PASID support after ATS support. - * So always enable PASID support on devices which have it, even if - * we can't yet know if we're ever going to use it. - */ - if (info->pasid_supported && - !pci_enable_pasid(pdev, info->pasid_supported & ~1)) - info->pasid_enabled = 1; - - if (sm_supported(iommu)) - iommu_enable_pci_ats(info); - iommu_enable_pci_pri(info); - return &iommu->iommu; free_table: intel_pasid_free_table(dev); @@ -3810,6 +3796,26 @@ free: return ERR_PTR(ret); } +static void intel_iommu_probe_finalize(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + + /* + * The PCIe spec, in its wisdom, declares that the behaviour of the + * device is undefined if you enable PASID support after ATS support. + * So always enable PASID support on devices which have it, even if + * we can't yet know if we're ever going to use it. + */ + if (info->pasid_supported && + !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) + info->pasid_enabled = 1; + + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) + iommu_enable_pci_ats(info); + iommu_enable_pci_pri(info); +} + static void intel_iommu_release_device(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); @@ -4391,6 +4397,7 @@ const struct iommu_ops intel_iommu_ops = { .domain_alloc_sva = intel_svm_domain_alloc, .domain_alloc_nested = intel_iommu_domain_alloc_nested, .probe_device = intel_iommu_probe_device, + .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .device_group = intel_iommu_device_group, From 1d2d8524eaffc4d9a116213520d2c650e07c9cc6 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 17 Apr 2025 11:52:39 -0500 Subject: [PATCH 063/559] iio: imu: inv_mpu6050: align buffer for timestamp Align the buffer used with iio_push_to_buffers_with_timestamp() to ensure the s64 timestamp is aligned to 8 bytes. Fixes: 0829edc43e0a ("iio: imu: inv_mpu6050: read the full fifo when processing data") Signed-off-by: David Lechner Link: https://patch.msgid.link/20250417-iio-more-timestamp-alignment-v1-7-eafac1e22318@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c index 3d3b27f28c9d..273196e647a2 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c @@ -50,7 +50,7 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p) u16 fifo_count; u32 fifo_period; s64 timestamp; - u8 data[INV_MPU6050_OUTPUT_DATA_SIZE]; + u8 data[INV_MPU6050_OUTPUT_DATA_SIZE] __aligned(8); size_t i, nb; mutex_lock(&st->lock); From bb49d940344bcb8e2b19e69d7ac86f567887ea9a Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 17 Apr 2025 11:52:37 -0500 Subject: [PATCH 064/559] iio: chemical: sps30: use aligned_s64 for timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow the pattern of other drivers and use aligned_s64 for the timestamp. This will ensure that the timestamp is correctly aligned on all architectures. Fixes: a5bf6fdd19c3 ("iio:chemical:sps30: Fix timestamp alignment") Signed-off-by: David Lechner Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250417-iio-more-timestamp-alignment-v1-5-eafac1e22318@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/sps30.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/chemical/sps30.c b/drivers/iio/chemical/sps30.c index 6f4f2ba2c09d..a7888146188d 100644 --- a/drivers/iio/chemical/sps30.c +++ b/drivers/iio/chemical/sps30.c @@ -108,7 +108,7 @@ static irqreturn_t sps30_trigger_handler(int irq, void *p) int ret; struct { s32 data[4]; /* PM1, PM2P5, PM4, PM10 */ - s64 ts; + aligned_s64 ts; } scan; mutex_lock(&state->lock); From 6ffa698674053e82e811520642db2650d00d2c01 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 17 Apr 2025 11:52:36 -0500 Subject: [PATCH 065/559] iio: chemical: pms7003: use aligned_s64 for timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow the pattern of other drivers and use aligned_s64 for the timestamp. This will ensure that the timestamp is correctly aligned on all architectures. Also move the unaligned.h header while touching this since it was the only one not in alphabetical order. Fixes: 13e945631c2f ("iio:chemical:pms7003: Fix timestamp alignment and prevent data leak.") Signed-off-by: David Lechner Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250417-iio-more-timestamp-alignment-v1-4-eafac1e22318@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/pms7003.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/chemical/pms7003.c b/drivers/iio/chemical/pms7003.c index d0bd94912e0a..e05ce1f12065 100644 --- a/drivers/iio/chemical/pms7003.c +++ b/drivers/iio/chemical/pms7003.c @@ -5,7 +5,6 @@ * Copyright (c) Tomasz Duszynski */ -#include #include #include #include @@ -19,6 +18,8 @@ #include #include #include +#include +#include #define PMS7003_DRIVER_NAME "pms7003" @@ -76,7 +77,7 @@ struct pms7003_state { /* Used to construct scan to push to the IIO buffer */ struct { u16 data[3]; /* PM1, PM2P5, PM10 */ - s64 ts; + aligned_s64 ts; } scan; }; From f79aeb6c631b57395f37acbfbe59727e355a714c Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Apr 2025 11:34:36 +0100 Subject: [PATCH 066/559] iio: temp: maxim-thermocouple: Fix potential lack of DMA safe buffer. The trick of using __aligned(IIO_DMA_MINALIGN) ensures that there is no overlap between buffers used for DMA and those used for driver state storage that are before the marking. It doesn't ensure anything above state variables found after the marking. Hence move this particular bit of state earlier in the structure. Fixes: 10897f34309b ("iio: temp: maxim_thermocouple: Fix alignment for DMA safety") Reviewed-by: David Lechner Link: https://patch.msgid.link/20250413103443.2420727-14-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/maxim_thermocouple.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/temperature/maxim_thermocouple.c b/drivers/iio/temperature/maxim_thermocouple.c index c28a7a6dea5f..555a61e2f3fd 100644 --- a/drivers/iio/temperature/maxim_thermocouple.c +++ b/drivers/iio/temperature/maxim_thermocouple.c @@ -121,9 +121,9 @@ static const struct maxim_thermocouple_chip maxim_thermocouple_chips[] = { struct maxim_thermocouple_data { struct spi_device *spi; const struct maxim_thermocouple_chip *chip; + char tc_type; u8 buffer[16] __aligned(IIO_DMA_MINALIGN); - char tc_type; }; static int maxim_thermocouple_read(struct maxim_thermocouple_data *data, From 1bb942287e05dc4c304a003ea85e6dd9a5e7db39 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Apr 2025 11:34:27 +0100 Subject: [PATCH 067/559] iio: accel: adxl355: Make timestamp 64-bit aligned using aligned_s64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The IIO ABI requires 64-bit aligned timestamps. In this case insufficient padding would have been added on architectures where an s64 is only 32-bit aligned. Use aligned_s64 to enforce the correct alignment. Fixes: 327a0eaf19d5 ("iio: accel: adxl355: Add triggered buffer support") Reported-by: David Lechner Reviewed-by: Nuno Sá Reviewed-by: David Lechner Link: https://patch.msgid.link/20250413103443.2420727-5-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl355_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/adxl355_core.c b/drivers/iio/accel/adxl355_core.c index e8cd21fa77a6..cbac622ef821 100644 --- a/drivers/iio/accel/adxl355_core.c +++ b/drivers/iio/accel/adxl355_core.c @@ -231,7 +231,7 @@ struct adxl355_data { u8 transf_buf[3]; struct { u8 buf[14]; - s64 ts; + aligned_s64 ts; } buffer; } __aligned(IIO_DMA_MINALIGN); }; From 5097eaae98e53f9ab9d35801c70da819b92ca907 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Apr 2025 11:34:26 +0100 Subject: [PATCH 068/559] iio: adc: dln2: Use aligned_s64 for timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here the lack of marking allows the overall structure to not be sufficiently aligned resulting in misplacement of the timestamp in iio_push_to_buffers_with_timestamp(). Use aligned_s64 to force the alignment on all architectures. Fixes: 7c0299e879dd ("iio: adc: Add support for DLN2 ADC") Reported-by: David Lechner Reviewed-by: Andy Shevchenko Reviewed-by: Nuno Sá Reviewed-by: David Lechner Link: https://patch.msgid.link/20250413103443.2420727-4-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/dln2-adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/dln2-adc.c b/drivers/iio/adc/dln2-adc.c index a1e48a756a7b..359e26e3f5bc 100644 --- a/drivers/iio/adc/dln2-adc.c +++ b/drivers/iio/adc/dln2-adc.c @@ -466,7 +466,7 @@ static irqreturn_t dln2_adc_trigger_h(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct { __le16 values[DLN2_ADC_MAX_CHANNELS]; - int64_t timestamp_space; + aligned_s64 timestamp_space; } data; struct dln2_adc_get_all_vals dev_data; struct dln2_adc *dln2 = iio_priv(indio_dev); From ffbc26bc91c1f1eb3dcf5d8776e74cbae21ee13a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Apr 2025 11:34:25 +0100 Subject: [PATCH 069/559] iio: adc: ad7768-1: Fix insufficient alignment of timestamp. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On architectures where an s64 is not 64-bit aligned, this may result insufficient alignment of the timestamp and the structure being too small. Use aligned_s64 to force the alignment. Fixes: a1caeebab07e ("iio: adc: ad7768-1: Fix too small buffer passed to iio_push_to_buffers_with_timestamp()") # aligned_s64 newer Reported-by: David Lechner Reviewed-by: Nuno Sá Reviewed-by: David Lechner Link: https://patch.msgid.link/20250413103443.2420727-3-jic23@kernel.org Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7768-1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index 5a863005aca6..5e0be36af0c5 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -168,7 +168,7 @@ struct ad7768_state { union { struct { __be32 chan; - s64 timestamp; + aligned_s64 timestamp; } scan; __be32 d32; u8 d8[2]; From 52d349884738c346961e153f195f4c7fe186fcf4 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Apr 2025 11:34:24 +0100 Subject: [PATCH 070/559] iio: adc: ad7266: Fix potential timestamp alignment issue. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On architectures where an s64 is only 32-bit aligned insufficient padding would be left between the earlier elements and the timestamp. Use aligned_s64 to enforce the correct placement and ensure the storage is large enough. Fixes: 54e018da3141 ("iio:ad7266: Mark transfer buffer as __be16") # aligned_s64 is much newer. Reported-by: David Lechner Reviewed-by: Nuno Sá Reviewed-by: David Lechner Link: https://patch.msgid.link/20250413103443.2420727-2-jic23@kernel.org Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7266.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index 18559757f908..7fef2727f89e 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -45,7 +45,7 @@ struct ad7266_state { */ struct { __be16 sample[2]; - s64 timestamp; + aligned_s64 timestamp; } data __aligned(IIO_DMA_MINALIGN); }; From 2e922956277187655ed9bedf7b5c28906e51708f Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Mon, 14 Apr 2025 11:40:49 -0400 Subject: [PATCH 071/559] staging: iio: adc: ad7816: Correct conditional logic for store mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mode setting logic in ad7816_store_mode was reversed due to incorrect handling of the strcmp return value. strcmp returns 0 on match, so the `if (strcmp(buf, "full"))` block executed when the input was not "full". This resulted in "full" setting the mode to AD7816_PD (power-down) and other inputs setting it to AD7816_FULL. Fix this by checking it against 0 to correctly check for "full" and "power-down", mapping them to AD7816_FULL and AD7816_PD respectively. Fixes: 7924425db04a ("staging: iio: adc: new driver for AD7816 devices") Cc: stable@vger.kernel.org Signed-off-by: Gabriel Shahrouzi Acked-by: Nuno Sá Link: https://lore.kernel.org/stable/20250414152920.467505-1-gshahrouzi%40gmail.com Link: https://patch.msgid.link/20250414154050.469482-1-gshahrouzi@gmail.com Signed-off-by: Jonathan Cameron --- drivers/staging/iio/adc/ad7816.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/adc/ad7816.c b/drivers/staging/iio/adc/ad7816.c index 6c14d7bcdd67..081b17f49863 100644 --- a/drivers/staging/iio/adc/ad7816.c +++ b/drivers/staging/iio/adc/ad7816.c @@ -136,7 +136,7 @@ static ssize_t ad7816_store_mode(struct device *dev, struct iio_dev *indio_dev = dev_to_iio_dev(dev); struct ad7816_chip_info *chip = iio_priv(indio_dev); - if (strcmp(buf, "full")) { + if (strcmp(buf, "full") == 0) { gpiod_set_value(chip->rdwr_pin, 1); chip->mode = AD7816_FULL; } else { From 936a25ef11f5d6c3e3e6736bb8b28e28dfb77918 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Apr 2025 10:45:17 -0700 Subject: [PATCH 072/559] input/joystick: magellan: Mark __nonstring look-up table GCC 15's new -Wunterminated-string-initialization notices that the 16 character lookup table "nibbles" (which is not used as a C-String) needs to be marked as "nonstring": drivers/input/joystick/magellan.c: In function 'magellan_crunch_nibbles': drivers/input/joystick/magellan.c:51:44: warning: initializer-string for array of 'unsigned char' truncates NUL terminator but destination lacks 'nonstring' attribute (17 chars into 16 available) [-Wunterminated-string-initialization] 51 | static unsigned char nibbles[16] = "0AB3D56GH9:K Link: https://lore.kernel.org/r/20250416174513.work.662-kees@kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/magellan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/joystick/magellan.c b/drivers/input/joystick/magellan.c index 2eaa25c9c68c..7622638e5bb8 100644 --- a/drivers/input/joystick/magellan.c +++ b/drivers/input/joystick/magellan.c @@ -48,7 +48,7 @@ struct magellan { static int magellan_crunch_nibbles(unsigned char *data, int count) { - static unsigned char nibbles[16] = "0AB3D56GH9:K Date: Fri, 18 Apr 2025 18:37:18 -0700 Subject: [PATCH 073/559] Input: sparcspkr - avoid unannotated fall-through Fix follow warnings with clang-21i (and reformat for clarity): drivers/input/misc/sparcspkr.c:78:3: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] 78 | case SND_TONE: break; | ^ drivers/input/misc/sparcspkr.c:78:3: note: insert 'break;' to avoid fall-through 78 | case SND_TONE: break; | ^ | break; drivers/input/misc/sparcspkr.c:113:3: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] 113 | case SND_TONE: break; | ^ drivers/input/misc/sparcspkr.c:113:3: note: insert 'break;' to avoid fall-through 113 | case SND_TONE: break; | ^ | break; 2 warnings generated. Signed-off-by: WangYuli Link: https://lore.kernel.org/r/6730E40353C76908+20250415052439.155051-1-wangyuli@uniontech.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/sparcspkr.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c index 8d7303fc13bc..1cfadd73829f 100644 --- a/drivers/input/misc/sparcspkr.c +++ b/drivers/input/misc/sparcspkr.c @@ -74,9 +74,14 @@ static int bbc_spkr_event(struct input_dev *dev, unsigned int type, unsigned int return -1; switch (code) { - case SND_BELL: if (value) value = 1000; - case SND_TONE: break; - default: return -1; + case SND_BELL: + if (value) + value = 1000; + break; + case SND_TONE: + break; + default: + return -1; } if (value > 20 && value < 32767) @@ -109,9 +114,14 @@ static int grover_spkr_event(struct input_dev *dev, unsigned int type, unsigned return -1; switch (code) { - case SND_BELL: if (value) value = 1000; - case SND_TONE: break; - default: return -1; + case SND_BELL: + if (value) + value = 1000; + break; + case SND_TONE: + break; + default: + return -1; } if (value > 20 && value < 32767) From c6cb8bf79466ae66bd0d07338c7c505ce758e9d7 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Thu, 10 Apr 2025 14:46:32 -0400 Subject: [PATCH 074/559] Input: cyttsp5 - ensure minimum reset pulse width The current reset pulse width is measured to be 5us on a Renesas RZ/G2L SOM. The manufacturer's minimum reset pulse width is specified as 10us. Extend reset pulse width to make sure it is long enough on all platforms. Also reword confusing comments about reset pin assertion. Fixes: 5b0c03e24a06 ("Input: Add driver for Cypress Generation 5 touchscreen") Cc: stable@vger.kernel.org Acked-by: Alistair Francis Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20250410184633.1164837-1-hugo@hugovil.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/cyttsp5.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/cyttsp5.c b/drivers/input/touchscreen/cyttsp5.c index eafe5a9b8964..14c43f0a6c21 100644 --- a/drivers/input/touchscreen/cyttsp5.c +++ b/drivers/input/touchscreen/cyttsp5.c @@ -870,13 +870,16 @@ static int cyttsp5_probe(struct device *dev, struct regmap *regmap, int irq, ts->input->phys = ts->phys; input_set_drvdata(ts->input, ts); - /* Reset the gpio to be in a reset state */ + /* Assert gpio to be in a reset state */ ts->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(ts->reset_gpio)) { error = PTR_ERR(ts->reset_gpio); dev_err(dev, "Failed to request reset gpio, error %d\n", error); return error; } + + fsleep(10); /* Ensure long-enough reset pulse (minimum 10us). */ + gpiod_set_value_cansleep(ts->reset_gpio, 0); /* Need a delay to have device up */ From 494d0939b1bda4d4ddca7d52a6ce6f808ff2c9a5 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 1 Apr 2025 15:04:02 +0800 Subject: [PATCH 075/559] ALSA: hda/realtek - Enable speaker for HP platform The speaker doesn't mute when plugged headphone. This platform support 4ch speakers. The speaker pin 0x14 wasn't fill verb table. After assigned model ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX. The speaker can mute when headphone was plugged. Fixes: aa8e3ef4fe53 ("ALSA: hda/realtek: Add quirks for various HP ENVY models") Signed-off-by: Kailang Yang Link: https://lore.kernel.org/eb4c14a4d85740069c909e756bbacb0e@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 877137cb09ac..8ed613932c5b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -441,6 +441,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000); fallthrough; case 0x10ec0215: + case 0x10ec0236: + case 0x10ec0245: + case 0x10ec0256: + case 0x10ec0257: case 0x10ec0285: case 0x10ec0289: alc_update_coef_idx(codec, 0x36, 1<<13, 0); @@ -448,12 +452,8 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0233: case 0x10ec0235: - case 0x10ec0236: - case 0x10ec0245: case 0x10ec0255: - case 0x10ec0256: case 0x19e58326: - case 0x10ec0257: case 0x10ec0282: case 0x10ec0283: case 0x10ec0286: @@ -10768,8 +10768,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8caf, "HP Elite mt645 G8 Mobile Thin Client", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8cbd, "HP Pavilion Aero Laptop 13-bg0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS), - SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x103c, 0x8cde, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), + SND_PCI_QUIRK(0x103c, 0x8cde, "HP OmniBook Ultra Flip Laptop 14t", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), From f406005e162b660dc405b4f18bf7bcb93a515608 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Thu, 17 Apr 2025 04:19:23 +0930 Subject: [PATCH 076/559] ALSA: usb-audio: Add retry on -EPROTO from usb_set_interface() During initialisation of Focusrite USB audio interfaces, -EPROTO is sometimes returned from usb_set_interface(), which sometimes prevents the device from working: subsequent usb_set_interface() and uac_clock_source_is_valid() calls fail. This patch adds up to 5 retries in endpoint_set_interface(), with a delay starting at 5ms and doubling each time. 5 retries was chosen to allow for longer than expected waits for the interface to start responding correctly; in testing, a single 5ms delay was sufficient to fix the issue. Closes: https://github.com/geoffreybennett/fcp-support/issues/2 Cc: stable@vger.kernel.org Signed-off-by: Geoffrey D. Bennett Link: https://patch.msgid.link/Z//7s9dKsmVxHzY2@m.b4.vu Signed-off-by: Takashi Iwai --- sound/usb/endpoint.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index a29f28eb7d0c..f36ec98da460 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -926,6 +926,8 @@ static int endpoint_set_interface(struct snd_usb_audio *chip, { int altset = set ? ep->altsetting : 0; int err; + int retries = 0; + const int max_retries = 5; if (ep->iface_ref->altset == altset) return 0; @@ -935,8 +937,13 @@ static int endpoint_set_interface(struct snd_usb_audio *chip, usb_audio_dbg(chip, "Setting usb interface %d:%d for EP 0x%x\n", ep->iface, altset, ep->ep_num); +retry: err = usb_set_interface(chip->dev, ep->iface, altset); if (err < 0) { + if (err == -EPROTO && ++retries <= max_retries) { + msleep(5 * (1 << (retries - 1))); + goto retry; + } usb_audio_err_ratelimited( chip, "%d:%d: usb_set_interface failed (%d)\n", ep->iface, altset, err); From e4570f4bb231f01e32d44fd38841665f340d6914 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 18 Apr 2025 11:17:13 -0500 Subject: [PATCH 077/559] iio: imu: adis16550: align buffers for timestamp Align the buffers used with iio_push_to_buffers_with_timestamp() to ensure the s64 timestamp is aligned to 8 bytes. Fixes: bac4368fab62 ("iio: imu: adis16550: add adis16550 support") Signed-off-by: David Lechner Link: https://patch.msgid.link/20250418-iio-more-timestamp-alignment-v2-1-d6a5d2b1c9fe@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis16550.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis16550.c b/drivers/iio/imu/adis16550.c index b14ea8937c7f..28f0dbd0226c 100644 --- a/drivers/iio/imu/adis16550.c +++ b/drivers/iio/imu/adis16550.c @@ -836,7 +836,7 @@ static irqreturn_t adis16550_trigger_handler(int irq, void *p) u16 dummy; bool valid; struct iio_poll_func *pf = p; - __be32 data[ADIS16550_MAX_SCAN_DATA]; + __be32 data[ADIS16550_MAX_SCAN_DATA] __aligned(8); struct iio_dev *indio_dev = pf->indio_dev; struct adis16550 *st = iio_priv(indio_dev); struct adis *adis = iio_device_get_drvdata(indio_dev); From ffcd19e9f4cca0c8f9e23e88f968711acefbb37b Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 18 Apr 2025 11:17:14 -0500 Subject: [PATCH 078/559] iio: pressure: mprls0025pa: use aligned_s64 for timestamp Follow the pattern of other drivers and use aligned_s64 for the timestamp. This will ensure the struct itself it also 8-byte aligned. While touching this, convert struct mpr_chan to an anonymous struct to consolidate the code a bit to make it easier for future readers. Fixes: 713337d9143e ("iio: pressure: Honeywell mprls0025pa pressure sensor") Signed-off-by: David Lechner Link: https://patch.msgid.link/20250418-iio-more-timestamp-alignment-v2-2-d6a5d2b1c9fe@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/mprls0025pa.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/iio/pressure/mprls0025pa.h b/drivers/iio/pressure/mprls0025pa.h index 9d5c30afa9d6..d62a018eaff3 100644 --- a/drivers/iio/pressure/mprls0025pa.h +++ b/drivers/iio/pressure/mprls0025pa.h @@ -34,16 +34,6 @@ struct iio_dev; struct mpr_data; struct mpr_ops; -/** - * struct mpr_chan - * @pres: pressure value - * @ts: timestamp - */ -struct mpr_chan { - s32 pres; - s64 ts; -}; - enum mpr_func_id { MPR_FUNCTION_A, MPR_FUNCTION_B, @@ -69,6 +59,8 @@ enum mpr_func_id { * reading in a loop until data is ready * @completion: handshake from irq to read * @chan: channel values for buffered mode + * @chan.pres: pressure value + * @chan.ts: timestamp * @buffer: raw conversion data */ struct mpr_data { @@ -87,7 +79,10 @@ struct mpr_data { struct gpio_desc *gpiod_reset; int irq; struct completion completion; - struct mpr_chan chan; + struct { + s32 pres; + aligned_s64 ts; + } chan; u8 buffer[MPR_MEASUREMENT_RD_SIZE] __aligned(IIO_DMA_MINALIGN); }; From f083f8a21cc785ebe3a33f756a3fa3660611f8db Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Fri, 18 Apr 2025 20:37:53 +0200 Subject: [PATCH 079/559] iio: adc: ad7606: fix serial register access Fix register read/write routine as per datasheet. When reading multiple consecutive registers, only the first one is read properly. This is due to missing chip select deassert and assert again between first and second 16bit transfer, as shown in the datasheet AD7606C-16, rev 0, figure 110. Fixes: f2a22e1e172f ("iio: adc: ad7606: Add support for software mode for ad7616") Reviewed-by: David Lechner Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250418-wip-bl-ad7606-fix-reg-access-v3-1-d5eeb440c738@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7606_spi.c b/drivers/iio/adc/ad7606_spi.c index 885bf0b68e77..179115e90988 100644 --- a/drivers/iio/adc/ad7606_spi.c +++ b/drivers/iio/adc/ad7606_spi.c @@ -131,7 +131,7 @@ static int ad7606_spi_reg_read(struct ad7606_state *st, unsigned int addr) { .tx_buf = &st->d16[0], .len = 2, - .cs_change = 0, + .cs_change = 1, }, { .rx_buf = &st->d16[1], .len = 2, From 609bc31eca06c7408e6860d8b46311ebe45c1fef Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Mon, 21 Apr 2025 09:15:39 -0400 Subject: [PATCH 080/559] iio: adis16201: Correct inclinometer channel resolution The inclinometer channels were previously defined with 14 realbits. However, the ADIS16201 datasheet states the resolution for these output channels is 12 bits (Page 14, text description; Page 15, table 7). Correct the realbits value to 12 to accurately reflect the hardware. Fixes: f7fe1d1dd5a5 ("staging: iio: new adis16201 driver") Cc: stable@vger.kernel.org Signed-off-by: Gabriel Shahrouzi Reviewed-by: Marcelo Schmitt Link: https://patch.msgid.link/20250421131539.912966-1-gshahrouzi@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adis16201.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/adis16201.c b/drivers/iio/accel/adis16201.c index 8601b9a8b8e7..5127e58eebc7 100644 --- a/drivers/iio/accel/adis16201.c +++ b/drivers/iio/accel/adis16201.c @@ -211,9 +211,9 @@ static const struct iio_chan_spec adis16201_channels[] = { BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14), ADIS_AUX_ADC_CHAN(ADIS16201_AUX_ADC_REG, ADIS16201_SCAN_AUX_ADC, 0, 12), ADIS_INCLI_CHAN(X, ADIS16201_XINCL_OUT_REG, ADIS16201_SCAN_INCLI_X, - BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 12), ADIS_INCLI_CHAN(Y, ADIS16201_YINCL_OUT_REG, ADIS16201_SCAN_INCLI_Y, - BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 12), IIO_CHAN_SOFT_TIMESTAMP(7) }; From 4b98bf3bff7353d94824c4d874ff2d7f38acc49a Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Tue, 11 Mar 2025 20:41:12 +0100 Subject: [PATCH 081/559] arm64: dts: imx8mp: configure GPU and NPU clocks in nominal DTSI Commit 255fbd9eabe7 ("arm64: dts: imx8mp: Add optional nominal drive mode DTSI") added imx8mp-nominal.dtsi, which overrides all overdrive clock rates in imx8mp.dtsi to the nominal rates. At the same time, commit 9f7595b3e5ae ("arm64: dts: imx8mp: configure GPU and NPU clocks to overdrive rate") went in, which changed some clock rates away from the nominal values. Resolve the discrepancy by effectively reverting the changes in the latter commit inside imx8mp-nominal.dtsi. This is required for proper operation of the imx8mp-skov boards, which are currently imx8mp-nominal.dtsi's only users and lets all other boards that don't include it benefit from the new higher frequencies. Signed-off-by: Ahmad Fatoum Signed-off-by: Shawn Guo --- .../boot/dts/freescale/imx8mp-nominal.dtsi | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi index a1b75c9068b2..dc0ccd723c6d 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi @@ -24,6 +24,20 @@ fsl,operating-mode = "nominal"; }; +&gpu2d { + assigned-clocks = <&clk IMX8MP_CLK_GPU2D_CORE>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>; +}; + +&gpu3d { + assigned-clocks = <&clk IMX8MP_CLK_GPU3D_CORE>, + <&clk IMX8MP_CLK_GPU3D_SHADER_CORE>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>, <800000000>; +}; + &pgc_hdmimix { assigned-clocks = <&clk IMX8MP_CLK_HDMI_AXI>, <&clk IMX8MP_CLK_HDMI_APB>; @@ -46,6 +60,18 @@ assigned-clock-rates = <600000000>, <300000000>; }; +&pgc_mlmix { + assigned-clocks = <&clk IMX8MP_CLK_ML_CORE>, + <&clk IMX8MP_CLK_ML_AXI>, + <&clk IMX8MP_CLK_ML_AHB>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>, + <800000000>, + <300000000>; +}; + &media_blk_ctrl { assigned-clocks = <&clk IMX8MP_CLK_MEDIA_AXI>, <&clk IMX8MP_CLK_MEDIA_APB>, From 02e4232998db357bb8199778722d81ffcff0cb98 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Fri, 14 Mar 2025 14:01:04 +0800 Subject: [PATCH 082/559] arm64: dts: imx95: Correct the range of PCIe app-reg region Correct the range of PCIe app-reg region from 0x2000 to 0x4000 refer to SerDes_SS memory map of i.MX95 Rerference Manual. Fixes: 3b1d5deb29ff ("arm64: dts: imx95: add pcie[0,1] and pcie-ep[0,1] support") Signed-off-by: Richard Zhu Reviewed-by: Frank Li Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx95.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi index 9bb26b466a06..59f057ba6fa7 100644 --- a/arch/arm64/boot/dts/freescale/imx95.dtsi +++ b/arch/arm64/boot/dts/freescale/imx95.dtsi @@ -1626,7 +1626,7 @@ reg = <0 0x4c300000 0 0x10000>, <0 0x60100000 0 0xfe00000>, <0 0x4c360000 0 0x10000>, - <0 0x4c340000 0 0x2000>; + <0 0x4c340000 0 0x4000>; reg-names = "dbi", "config", "atu", "app"; ranges = <0x81000000 0x0 0x00000000 0x0 0x6ff00000 0 0x00100000>, <0x82000000 0x0 0x10000000 0x9 0x10000000 0 0x10000000>; @@ -1673,7 +1673,7 @@ reg = <0 0x4c300000 0 0x10000>, <0 0x4c360000 0 0x1000>, <0 0x4c320000 0 0x1000>, - <0 0x4c340000 0 0x2000>, + <0 0x4c340000 0 0x4000>, <0 0x4c370000 0 0x10000>, <0x9 0 1 0>; reg-names = "dbi","atu", "dbi2", "app", "dma", "addr_space"; @@ -1700,7 +1700,7 @@ reg = <0 0x4c380000 0 0x10000>, <8 0x80100000 0 0xfe00000>, <0 0x4c3e0000 0 0x10000>, - <0 0x4c3c0000 0 0x2000>; + <0 0x4c3c0000 0 0x4000>; reg-names = "dbi", "config", "atu", "app"; ranges = <0x81000000 0 0x00000000 0x8 0x8ff00000 0 0x00100000>, <0x82000000 0 0x10000000 0xa 0x10000000 0 0x10000000>; @@ -1749,7 +1749,7 @@ reg = <0 0x4c380000 0 0x10000>, <0 0x4c3e0000 0 0x1000>, <0 0x4c3a0000 0 0x1000>, - <0 0x4c3c0000 0 0x2000>, + <0 0x4c3c0000 0 0x4000>, <0 0x4c3f0000 0 0x10000>, <0xa 0 1 0>; reg-names = "dbi", "atu", "dbi2", "app", "dma", "addr_space"; From 6e1a7bc8382b0d4208258f7d2a4474fae788dd90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Szymanski?= Date: Fri, 14 Mar 2025 17:20:38 +0100 Subject: [PATCH 083/559] ARM: dts: opos6ul: add ksz8081 phy properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup") removed a PHY fixup that setted the clock mode and the LED mode. Make the Ethernet interface work again by doing as advised in the commit's log, set clock mode and the LED mode in the device tree. Fixes: c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup") Signed-off-by: Sébastien Szymanski Reviewed-by: Oleksij Rempel Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi index f2386dcb9ff2..dda4fa91b2f2 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi @@ -40,6 +40,9 @@ reg = <1>; interrupt-parent = <&gpio4>; interrupts = <16 IRQ_TYPE_LEVEL_LOW>; + micrel,led-mode = <1>; + clocks = <&clks IMX6UL_CLK_ENET_REF>; + clock-names = "rmii-ref"; status = "okay"; }; }; From 7e21ea8149a0e41c3666ee52cc063a6f797a7a2a Mon Sep 17 00:00:00 2001 From: Chen Linxuan Date: Tue, 15 Apr 2025 12:06:16 +0300 Subject: [PATCH 084/559] drm/i915/pxp: fix undefined reference to `intel_pxp_gsccs_is_ready_for_sessions' On x86_64 with gcc version 13.3.0, I compile kernel with: make defconfig ./scripts/kconfig/merge_config.sh .config <( echo CONFIG_COMPILE_TEST=y ) make KCFLAGS="-fno-inline-functions -fno-inline-small-functions -fno-inline-functions-called-once" Then I get a linker error: ld: vmlinux.o: in function `pxp_fw_dependencies_completed': kintel_pxp.c:(.text+0x95728f): undefined reference to `intel_pxp_gsccs_is_ready_for_sessions' This is caused by not having a intel_pxp_gsccs_is_ready_for_sessions() header stub for CONFIG_DRM_I915_PXP=n. Add it. Signed-off-by: Chen Linxuan Fixes: 99afb7cc8c44 ("drm/i915/pxp: Add ARB session creation and cleanup") Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20250415090616.2649889-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit b484c1e225a6a582fc78c4d7af7b286408bb7d41) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h index 9aae779c4da3..4969d3de2bac 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h @@ -23,6 +23,7 @@ int intel_pxp_gsccs_init(struct intel_pxp *pxp); int intel_pxp_gsccs_create_session(struct intel_pxp *pxp, int arb_session_id); void intel_pxp_gsccs_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); +bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); #else static inline void intel_pxp_gsccs_fini(struct intel_pxp *pxp) @@ -34,8 +35,11 @@ static inline int intel_pxp_gsccs_init(struct intel_pxp *pxp) return 0; } +static inline bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp) +{ + return false; +} + #endif -bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); - #endif /*__INTEL_PXP_GSCCS_H__ */ From 460b14b0929fa9f658a7e159ef646ce456962ab0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 18 Apr 2025 13:27:53 +0200 Subject: [PATCH 085/559] spi: stm32-ospi: Fix an error handling path in stm32_ospi_probe() If an error occurs after a successful stm32_ospi_dma_setup() call, some dma_release_channel() calls are needed to release some resources, as already done in the remove function. Fixes: 79b8a705e26c ("spi: stm32: Add OSPI driver") Signed-off-by: Christophe JAILLET Reviewed-by: Patrice Chotard Link: https://patch.msgid.link/2674c8df1d05ab312826b69bfe9559f81d125a0b.1744975624.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- drivers/spi/spi-stm32-ospi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi-stm32-ospi.c b/drivers/spi/spi-stm32-ospi.c index 668022098b1e..9ec9823409cc 100644 --- a/drivers/spi/spi-stm32-ospi.c +++ b/drivers/spi/spi-stm32-ospi.c @@ -960,6 +960,10 @@ err_pm_resume: err_pm_enable: pm_runtime_force_suspend(ospi->dev); mutex_destroy(&ospi->lock); + if (ospi->dma_chtx) + dma_release_channel(ospi->dma_chtx); + if (ospi->dma_chrx) + dma_release_channel(ospi->dma_chrx); return ret; } From af5226abb40cae959f424f7ca614787a1c87ce48 Mon Sep 17 00:00:00 2001 From: Salah Triki Date: Wed, 16 Apr 2025 20:26:25 +0100 Subject: [PATCH 086/559] smb: server: smb2pdu: check return value of xa_store() xa_store() may fail so check its return value and return error code if error occurred. Signed-off-by: Salah Triki Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 57839f9708bb..372021b3f263 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1445,7 +1445,7 @@ static int ntlm_authenticate(struct ksmbd_work *work, { struct ksmbd_conn *conn = work->conn; struct ksmbd_session *sess = work->sess; - struct channel *chann = NULL; + struct channel *chann = NULL, *old; struct ksmbd_user *user; u64 prev_id; int sz, rc; @@ -1557,7 +1557,12 @@ binding_session: return -ENOMEM; chann->conn = conn; - xa_store(&sess->ksmbd_chann_list, (long)conn, chann, KSMBD_DEFAULT_GFP); + old = xa_store(&sess->ksmbd_chann_list, (long)conn, chann, + KSMBD_DEFAULT_GFP); + if (xa_is_err(old)) { + kfree(chann); + return xa_err(old); + } } } From a1f46c99d9ea411f9bf30025b912d881d36fc709 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 17 Apr 2025 10:10:15 +0900 Subject: [PATCH 087/559] ksmbd: fix use-after-free in ksmbd_session_rpc_open A UAF issue can occur due to a race condition between ksmbd_session_rpc_open() and __session_rpc_close(). Add rpc_lock to the session to protect it. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/mgmt/user_session.c | 20 ++++++++++++++------ fs/smb/server/mgmt/user_session.h | 1 + 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 3f45f28f6f0f..9dec4c2940bc 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -59,10 +59,12 @@ static void ksmbd_session_rpc_clear_list(struct ksmbd_session *sess) struct ksmbd_session_rpc *entry; long index; + down_write(&sess->rpc_lock); xa_for_each(&sess->rpc_handle_list, index, entry) { xa_erase(&sess->rpc_handle_list, index); __session_rpc_close(sess, entry); } + up_write(&sess->rpc_lock); xa_destroy(&sess->rpc_handle_list); } @@ -92,7 +94,7 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name) { struct ksmbd_session_rpc *entry, *old; struct ksmbd_rpc_command *resp; - int method; + int method, id; method = __rpc_method(rpc_name); if (!method) @@ -102,26 +104,29 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name) if (!entry) return -ENOMEM; + down_read(&sess->rpc_lock); entry->method = method; - entry->id = ksmbd_ipc_id_alloc(); - if (entry->id < 0) + entry->id = id = ksmbd_ipc_id_alloc(); + if (id < 0) goto free_entry; - old = xa_store(&sess->rpc_handle_list, entry->id, entry, KSMBD_DEFAULT_GFP); + old = xa_store(&sess->rpc_handle_list, id, entry, KSMBD_DEFAULT_GFP); if (xa_is_err(old)) goto free_id; - resp = ksmbd_rpc_open(sess, entry->id); + resp = ksmbd_rpc_open(sess, id); if (!resp) goto erase_xa; + up_read(&sess->rpc_lock); kvfree(resp); - return entry->id; + return id; erase_xa: xa_erase(&sess->rpc_handle_list, entry->id); free_id: ksmbd_rpc_id_free(entry->id); free_entry: kfree(entry); + up_read(&sess->rpc_lock); return -EINVAL; } @@ -129,9 +134,11 @@ void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id) { struct ksmbd_session_rpc *entry; + down_write(&sess->rpc_lock); entry = xa_erase(&sess->rpc_handle_list, id); if (entry) __session_rpc_close(sess, entry); + up_write(&sess->rpc_lock); } int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id) @@ -439,6 +446,7 @@ static struct ksmbd_session *__session_create(int protocol) sess->sequence_number = 1; rwlock_init(&sess->tree_conns_lock); atomic_set(&sess->refcnt, 2); + init_rwsem(&sess->rpc_lock); ret = __init_smb2_session(sess); if (ret) diff --git a/fs/smb/server/mgmt/user_session.h b/fs/smb/server/mgmt/user_session.h index f21348381d59..c5749d6ec715 100644 --- a/fs/smb/server/mgmt/user_session.h +++ b/fs/smb/server/mgmt/user_session.h @@ -63,6 +63,7 @@ struct ksmbd_session { rwlock_t tree_conns_lock; atomic_t refcnt; + struct rw_semaphore rpc_lock; }; static inline int test_session_flag(struct ksmbd_session *sess, int bit) From be3f1938d3e6ea8186f0de3dd95245dda4f22c1e Mon Sep 17 00:00:00 2001 From: Dave Chen Date: Tue, 15 Apr 2025 14:33:42 +0800 Subject: [PATCH 088/559] btrfs: fix COW handling in run_delalloc_nocow() In run_delalloc_nocow(), when the found btrfs_key's offset > cur_offset, it indicates a gap between the current processing region and the next file extent. The original code would directly jump to the "must_cow" label, which increments the slot and forces a fallback to COW. This behavior might skip an extent item and result in an overestimated COW fallback range. This patch modifies the logic so that when a gap is detected: - If no COW range is already being recorded (cow_start is unset), cow_start is set to cur_offset. - cur_offset is then advanced to the beginning of the next extent. - Instead of jumping to "must_cow", control flows directly to "next_slot" so that the same extent item can be reexamined properly. The change ensures that we accurately account for the extent gap and avoid accidentally extending the range that needs to fallback to COW. CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Filipe Manana Signed-off-by: Dave Chen Signed-off-by: David Sterba --- fs/btrfs/inode.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6eedfbfce1cb..312fa996a987 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2129,12 +2129,13 @@ next_slot: /* * If the found extent starts after requested offset, then - * adjust extent_end to be right before this extent begins + * adjust cur_offset to be right before this extent begins. */ if (found_key.offset > cur_offset) { - extent_end = found_key.offset; - extent_type = 0; - goto must_cow; + if (cow_start == (u64)-1) + cow_start = cur_offset; + cur_offset = found_key.offset; + goto next_slot; } /* From 48c1d1bb525b1c44b8bdc8e7ec5629cb6c2b9fc4 Mon Sep 17 00:00:00 2001 From: Penglei Jiang Date: Mon, 21 Apr 2025 08:40:29 -0700 Subject: [PATCH 089/559] btrfs: fix the inode leak in btrfs_iget() [BUG] There is a bug report that a syzbot reproducer can lead to the following busy inode at unmount time: BTRFS info (device loop1): last unmount of filesystem 1680000e-3c1e-4c46-84b6-56bd3909af50 VFS: Busy inodes after unmount of loop1 (btrfs) ------------[ cut here ]------------ kernel BUG at fs/super.c:650! Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI CPU: 0 UID: 0 PID: 48168 Comm: syz-executor Not tainted 6.15.0-rc2-00471-g119009db2674 #2 PREEMPT(full) Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:generic_shutdown_super+0x2e9/0x390 fs/super.c:650 Call Trace: kill_anon_super+0x3a/0x60 fs/super.c:1237 btrfs_kill_super+0x3b/0x50 fs/btrfs/super.c:2099 deactivate_locked_super+0xbe/0x1a0 fs/super.c:473 deactivate_super fs/super.c:506 [inline] deactivate_super+0xe2/0x100 fs/super.c:502 cleanup_mnt+0x21f/0x440 fs/namespace.c:1435 task_work_run+0x14d/0x240 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop kernel/entry/common.c:114 [inline] exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] syscall_exit_to_user_mode+0x269/0x290 kernel/entry/common.c:218 do_syscall_64+0xd4/0x250 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f [CAUSE] When btrfs_alloc_path() failed, btrfs_iget() directly returned without releasing the inode already allocated by btrfs_iget_locked(). This results the above busy inode and trigger the kernel BUG. [FIX] Fix it by calling iget_failed() if btrfs_alloc_path() failed. If we hit error inside btrfs_read_locked_inode(), it will properly call iget_failed(), so nothing to worry about. Although the iget_failed() cleanup inside btrfs_read_locked_inode() is a break of the normal error handling scheme, let's fix the obvious bug and backport first, then rework the error handling later. Reported-by: Penglei Jiang Link: https://lore.kernel.org/linux-btrfs/20250421102425.44431-1-superman.xpt@gmail.com/ Fixes: 7c855e16ab72 ("btrfs: remove conditional path allocation in btrfs_read_locked_inode()") CC: stable@vger.kernel.org # 6.13+ Reviewed-by: Qu Wenruo Signed-off-by: Penglei Jiang Signed-off-by: David Sterba --- fs/btrfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 312fa996a987..d295a37fa049 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5682,8 +5682,10 @@ struct btrfs_inode *btrfs_iget(u64 ino, struct btrfs_root *root) return inode; path = btrfs_alloc_path(); - if (!path) + if (!path) { + iget_failed(&inode->vfs_inode); return ERR_PTR(-ENOMEM); + } ret = btrfs_read_locked_inode(inode, path); btrfs_free_path(path); From e08e49d986f82c30f42ad0ed43ebbede1e1e3739 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 14 Apr 2025 14:51:58 -0400 Subject: [PATCH 090/559] btrfs: adjust subpage bit start based on sectorsize When running machines with 64k page size and a 16k nodesize we started seeing tree log corruption in production. This turned out to be because we were not writing out dirty blocks sometimes, so this in fact affects all metadata writes. When writing out a subpage EB we scan the subpage bitmap for a dirty range. If the range isn't dirty we do bit_start++; to move onto the next bit. The problem is the bitmap is based on the number of sectors that an EB has. So in this case, we have a 64k pagesize, 16k nodesize, but a 4k sectorsize. This means our bitmap is 4 bits for every node. With a 64k page size we end up with 4 nodes per page. To make this easier this is how everything looks [0 16k 32k 48k ] logical address [0 4 8 12 ] radix tree offset [ 64k page ] folio [ 16k eb ][ 16k eb ][ 16k eb ][ 16k eb ] extent buffers [ | | | | | | | | | | | | | | | | ] bitmap Now we use all of our addressing based on fs_info->sectorsize_bits, so as you can see the above our 16k eb->start turns into radix entry 4. When we find a dirty range for our eb, we correctly do bit_start += sectors_per_node, because if we start at bit 0, the next bit for the next eb is 4, to correspond to eb->start 16k. However if our range is clean, we will do bit_start++, which will now put us offset from our radix tree entries. In our case, assume that the first time we check the bitmap the block is not dirty, we increment bit_start so now it == 1, and then we loop around and check again. This time it is dirty, and we go to find that start using the following equation start = folio_start + bit_start * fs_info->sectorsize; so in the case above, eb->start 0 is now dirty, and we calculate start as 0 + 1 * fs_info->sectorsize = 4096 4096 >> 12 = 1 Now we're looking up the radix tree for 1, and we won't find an eb. What's worse is now we're using bit_start == 1, so we do bit_start += sectors_per_node, which is now 5. If that eb is dirty we will run into the same thing, we will look at an offset that is not populated in the radix tree, and now we're skipping the writeout of dirty extent buffers. The best fix for this is to not use sectorsize_bits to address nodes, but that's a larger change. Since this is a fs corruption problem fix it simply by always using sectors_per_node to increment the start bit. Fixes: c4aec299fa8f ("btrfs: introduce submit_eb_subpage() to submit a subpage metadata page") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Boris Burkov Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 197f5e51c474..8515c31f563b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2047,7 +2047,7 @@ static int submit_eb_subpage(struct folio *folio, struct writeback_control *wbc) subpage->bitmaps)) { spin_unlock_irqrestore(&subpage->lock, flags); spin_unlock(&folio->mapping->i_private_lock); - bit_start++; + bit_start += sectors_per_node; continue; } From 34024cf69c51b11c2b608f5d578626b9b1c484f5 Mon Sep 17 00:00:00 2001 From: Hao Chang Date: Tue, 22 Apr 2025 15:52:05 +0800 Subject: [PATCH 091/559] pinctrl: mediatek: Fix new design debounce issue Calculate the true offset of eint according to index. Fixes: 3ef9f710efcb ("pinctrl: mediatek: Add EINT support for multiple addresses") Signed-off-by: Hao Chang Signed-off-by: Qingliang Li Link: https://lore.kernel.org/20250422075216.14073-1-ot_chhao.chang@mediatek.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/mtk-eint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index ced4ee509b5b..b4eb2beab691 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -449,7 +449,7 @@ int mtk_eint_set_debounce(struct mtk_eint *eint, unsigned long eint_num, return -EOPNOTSUPP; virq = irq_find_mapping(eint->domain, eint_num); - eint_offset = (eint_num % 4) * 8; + eint_offset = (idx % 4) * 8; d = irq_get_irq_data(virq); set_offset = (idx / 4) * 4 + eint->regs->dbnc_set; From 12df9ec3e1955aed6a0c839f2375cd8e5d5150cf Mon Sep 17 00:00:00 2001 From: Saranya Gopal Date: Mon, 21 Apr 2025 09:43:32 +0530 Subject: [PATCH 092/559] platform/x86/intel: hid: Add Pantherlake support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Pantherlake ACPI device ID to the Intel HID driver. While there, clean up the device ID table to remove the ", 0" parts. Suggested-by: Andy Shevchenko Signed-off-by: Saranya Gopal Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250421041332.830136-1-saranya.gopal@intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/hid.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 88a1a9ff2f34..0b5e43444ed6 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -44,16 +44,17 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alex Hung"); static const struct acpi_device_id intel_hid_ids[] = { - {"INT33D5", 0}, - {"INTC1051", 0}, - {"INTC1054", 0}, - {"INTC1070", 0}, - {"INTC1076", 0}, - {"INTC1077", 0}, - {"INTC1078", 0}, - {"INTC107B", 0}, - {"INTC10CB", 0}, - {"", 0}, + { "INT33D5" }, + { "INTC1051" }, + { "INTC1054" }, + { "INTC1070" }, + { "INTC1076" }, + { "INTC1077" }, + { "INTC1078" }, + { "INTC107B" }, + { "INTC10CB" }, + { "INTC10CC" }, + { } }; MODULE_DEVICE_TABLE(acpi, intel_hid_ids); From 246f9bb62016c423972ea7f2335a8e0ed3521cde Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Sat, 19 Apr 2025 12:45:29 -0300 Subject: [PATCH 093/559] platform/x86: alienware-wmi-wmax: Add support for Alienware m15 R7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend thermal control support to Alienware m15 R7. Cc: stable@vger.kernel.org Tested-by: Romain THERY Signed-off-by: Kurt Borja Link: https://lore.kernel.org/r/20250419-m15-r7-v1-1-18c6eaa27e25@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/alienware-wmi-wmax.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index 3f9e1e986ecf..08b82c151e07 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -69,6 +69,14 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { }, .driver_data = &generic_quirks, }, + { + .ident = "Alienware m15 R7", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m15 R7"), + }, + .driver_data = &generic_quirks, + }, { .ident = "Alienware m16 R1", .matches = { From 77bdac73754e4c0c564c1ca80fe3d9c93b0e715a Mon Sep 17 00:00:00 2001 From: Pavel Nikulin Date: Fri, 18 Apr 2025 20:06:08 +0600 Subject: [PATCH 094/559] platform/x86: asus-wmi: Disable OOBE state after resume from hibernation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ASUS firmware resets OOBE state during S4 suspend, so the keyboard blinks during resume from hibernation. This patch disables OOBE state after resume from hibernation. Signed-off-by: Pavel Nikulin Link: https://lore.kernel.org/r/20250418140706.1691-1-pavel@noa-labs.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-wmi.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 38ef778e8c19..0c697b46f436 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -304,6 +304,7 @@ struct asus_wmi { u32 kbd_rgb_dev; bool kbd_rgb_state_available; + bool oobe_state_available; u8 throttle_thermal_policy_mode; u32 throttle_thermal_policy_dev; @@ -1826,7 +1827,7 @@ static int asus_wmi_led_init(struct asus_wmi *asus) goto error; } - if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_OOBE)) { + if (asus->oobe_state_available) { /* * Disable OOBE state, so that e.g. the keyboard backlight * works. @@ -4723,6 +4724,7 @@ static int asus_wmi_add(struct platform_device *pdev) asus->egpu_enable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_EGPU); asus->dgpu_disable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_DGPU); asus->kbd_rgb_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_STATE); + asus->oobe_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_OOBE); asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) && dmi_check_system(asus_ally_mcu_quirk); @@ -4970,6 +4972,13 @@ static int asus_hotk_restore(struct device *device) } if (!IS_ERR_OR_NULL(asus->kbd_led.dev)) kbd_led_update(asus); + if (asus->oobe_state_available) { + /* + * Disable OOBE state, so that e.g. the keyboard backlight + * works. + */ + asus_wmi_set_devstate(ASUS_WMI_DEVID_OOBE, 1, NULL); + } if (asus_wmi_has_fnlock_key(asus)) asus_wmi_fnlock_update(asus); From 02c6e43397c39edd0c172859bf8c851b46be09a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C5=A1per=20Nemgar?= Date: Fri, 18 Apr 2025 09:07:38 +0200 Subject: [PATCH 095/559] platform/x86: ideapad-laptop: add support for some new buttons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add entries to unsupported WMI codes in ideapad_keymap[] and one check for WMI code 0x13d to trigger platform_profile_cycle(). Signed-off-by: Gašper Nemgar Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250418070738.7171-1-gasper.nemgar@gmail.com [ij: joined nested if ()s & major tweaks to changelog] Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/ideapad-laptop.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 17a09b7784ed..ede483573fe0 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1294,6 +1294,16 @@ static const struct key_entry ideapad_keymap[] = { /* Specific to some newer models */ { KE_KEY, 0x3e | IDEAPAD_WMI_KEY, { KEY_MICMUTE } }, { KE_KEY, 0x3f | IDEAPAD_WMI_KEY, { KEY_RFKILL } }, + /* Star- (User Assignable Key) */ + { KE_KEY, 0x44 | IDEAPAD_WMI_KEY, { KEY_PROG1 } }, + /* Eye */ + { KE_KEY, 0x45 | IDEAPAD_WMI_KEY, { KEY_PROG3 } }, + /* Performance toggle also Fn+Q, handled inside ideapad_wmi_notify() */ + { KE_KEY, 0x3d | IDEAPAD_WMI_KEY, { KEY_PROG4 } }, + /* shift + prtsc */ + { KE_KEY, 0x2d | IDEAPAD_WMI_KEY, { KEY_CUT } }, + { KE_KEY, 0x29 | IDEAPAD_WMI_KEY, { KEY_TOUCHPAD_TOGGLE } }, + { KE_KEY, 0x2a | IDEAPAD_WMI_KEY, { KEY_ROOT_MENU } }, { KE_END }, }; @@ -2080,6 +2090,12 @@ static void ideapad_wmi_notify(struct wmi_device *wdev, union acpi_object *data) dev_dbg(&wdev->dev, "WMI fn-key event: 0x%llx\n", data->integer.value); + /* performance button triggered by 0x3d */ + if (data->integer.value == 0x3d && priv->dytc) { + platform_profile_cycle(); + break; + } + /* 0x02 FnLock, 0x03 Esc */ if (data->integer.value == 0x02 || data->integer.value == 0x03) ideapad_fn_lock_led_notify(priv, data->integer.value == 0x02); From 446d28584723e5d4bcdb18cf6ef87cb2bb597dd8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Apr 2025 11:23:15 +0300 Subject: [PATCH 096/559] pinctrl: mediatek: common-v1: Fix error checking in mtk_eint_init() The devm_kzalloc() function doesn't return error pointers, it returns NULL on error. Then on the next line it checks the same pointer again by mistake, "->base" instead of "->base[0]". Fixes: fe412e3a6c97 ("pinctrl: mediatek: common-v1: Fix EINT breakage on older controllers") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/aAijc10fHka1WAMX@stanley.mountain Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 7585de11854b..8596f3541265 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1018,12 +1018,12 @@ static int mtk_eint_init(struct mtk_pinctrl *pctl, struct platform_device *pdev) pctl->eint->nbase = 1; /* mtk-eint expects an array */ pctl->eint->base = devm_kzalloc(pctl->dev, sizeof(pctl->eint->base), GFP_KERNEL); - if (IS_ERR(pctl->eint->base)) + if (!pctl->eint->base) return -ENOMEM; pctl->eint->base[0] = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(pctl->eint->base)) - return PTR_ERR(pctl->eint->base); + if (IS_ERR(pctl->eint->base[0])) + return PTR_ERR(pctl->eint->base[0]); pctl->eint->irq = irq_of_parse_and_map(np, 0); if (!pctl->eint->irq) From a3d8f0a7f5e8b193db509c7191fefeed3533fc44 Mon Sep 17 00:00:00 2001 From: LongPing Wei Date: Thu, 17 Apr 2025 11:07:38 +0800 Subject: [PATCH 097/559] dm-bufio: don't schedule in atomic context A BUG was reported as below when CONFIG_DEBUG_ATOMIC_SLEEP and try_verify_in_tasklet are enabled. [ 129.444685][ T934] BUG: sleeping function called from invalid context at drivers/md/dm-bufio.c:2421 [ 129.444723][ T934] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 934, name: kworker/1:4 [ 129.444740][ T934] preempt_count: 201, expected: 0 [ 129.444756][ T934] RCU nest depth: 0, expected: 0 [ 129.444781][ T934] Preemption disabled at: [ 129.444789][ T934] [] shrink_work+0x21c/0x248 [ 129.445167][ T934] kernel BUG at kernel/sched/walt/walt_debug.c:16! [ 129.445183][ T934] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP [ 129.445204][ T934] Skip md ftrace buffer dump for: 0x1609e0 [ 129.447348][ T934] CPU: 1 PID: 934 Comm: kworker/1:4 Tainted: G W OE 6.6.56-android15-8-o-g6f82312b30b9-debug #1 1400000003000000474e5500b3187743670464e8 [ 129.447362][ T934] Hardware name: Qualcomm Technologies, Inc. Parrot QRD, Alpha-M (DT) [ 129.447373][ T934] Workqueue: dm_bufio_cache shrink_work [ 129.447394][ T934] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 129.447406][ T934] pc : android_rvh_schedule_bug+0x0/0x8 [sched_walt_debug] [ 129.447435][ T934] lr : __traceiter_android_rvh_schedule_bug+0x44/0x6c [ 129.447451][ T934] sp : ffffffc0843dbc90 [ 129.447459][ T934] x29: ffffffc0843dbc90 x28: ffffffffffffffff x27: 0000000000000c8b [ 129.447479][ T934] x26: 0000000000000040 x25: ffffff804b3d6260 x24: ffffffd816232b68 [ 129.447497][ T934] x23: ffffff805171c5b4 x22: 0000000000000000 x21: ffffffd816231900 [ 129.447517][ T934] x20: ffffff80306ba898 x19: 0000000000000000 x18: ffffffc084159030 [ 129.447535][ T934] x17: 00000000d2b5dd1f x16: 00000000d2b5dd1f x15: ffffffd816720358 [ 129.447554][ T934] x14: 0000000000000004 x13: ffffff89ef978000 x12: 0000000000000003 [ 129.447572][ T934] x11: ffffffd817a823c4 x10: 0000000000000202 x9 : 7e779c5735de9400 [ 129.447591][ T934] x8 : ffffffd81560d004 x7 : 205b5d3938373434 x6 : ffffffd8167397c8 [ 129.447610][ T934] x5 : 0000000000000000 x4 : 0000000000000001 x3 : ffffffc0843db9e0 [ 129.447629][ T934] x2 : 0000000000002f15 x1 : 0000000000000000 x0 : 0000000000000000 [ 129.447647][ T934] Call trace: [ 129.447655][ T934] android_rvh_schedule_bug+0x0/0x8 [sched_walt_debug 1400000003000000474e550080cce8a8a78606b6] [ 129.447681][ T934] __might_resched+0x190/0x1a8 [ 129.447694][ T934] shrink_work+0x180/0x248 [ 129.447706][ T934] process_one_work+0x260/0x624 [ 129.447718][ T934] worker_thread+0x28c/0x454 [ 129.447729][ T934] kthread+0x118/0x158 [ 129.447742][ T934] ret_from_fork+0x10/0x20 [ 129.447761][ T934] Code: ???????? ???????? ???????? d2b5dd1f (d4210000) [ 129.447772][ T934] ---[ end trace 0000000000000000 ]--- dm_bufio_lock will call spin_lock_bh when try_verify_in_tasklet is enabled, and __scan will be called in atomic context. Fixes: 7cd326747f46 ("dm bufio: remove dm_bufio_cond_resched()") Signed-off-by: LongPing Wei Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka --- drivers/md/dm-bufio.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 9c8ed65cd87e..f0b5a6931161 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -68,6 +68,8 @@ #define LIST_DIRTY 1 #define LIST_SIZE 2 +#define SCAN_RESCHED_CYCLE 16 + /*--------------------------------------------------------------*/ /* @@ -2424,7 +2426,12 @@ static void __scan(struct dm_bufio_client *c) atomic_long_dec(&c->need_shrink); freed++; - cond_resched(); + + if (unlikely(freed % SCAN_RESCHED_CYCLE == 0)) { + dm_bufio_unlock(c); + cond_resched(); + dm_bufio_lock(c); + } } } } From 0a533c3e4246c29d502a7e0fba0e86d80a906b04 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 22 Apr 2025 21:18:33 +0200 Subject: [PATCH 098/559] dm-integrity: fix a warning on invalid table line If we use the 'B' mode and we have an invalit table line, cancel_delayed_work_sync would trigger a warning. This commit avoids the warning. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 2a283feb3319..cc3d3897ef42 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -5164,7 +5164,7 @@ static void dm_integrity_dtr(struct dm_target *ti) BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); BUG_ON(!list_empty(&ic->wait_list)); - if (ic->mode == 'B') + if (ic->mode == 'B' && ic->bitmap_flush_work.work.func) cancel_delayed_work_sync(&ic->bitmap_flush_work); if (ic->metadata_wq) destroy_workqueue(ic->metadata_wq); From 70ad2e6bd180f94be030aef56e59693e36d945f3 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 23 Apr 2025 10:09:44 +0100 Subject: [PATCH 099/559] ASoC: cs42l43: Disable headphone clamps during type detection The headphone clamps cause fairly loud pops during type detect because they sink current from the detection process itself. Disable the clamps whilst the type detect runs, to improve the detection pop performance. Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250423090944.1504538-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43-jack.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index 20e6ab6f0d4a..6165ac16c3a9 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c @@ -654,6 +654,10 @@ static int cs42l43_run_type_detect(struct cs42l43_codec *priv) reinit_completion(&priv->type_detect); + regmap_update_bits(cs42l43->regmap, CS42L43_STEREO_MIC_CLAMP_CTRL, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK); + cs42l43_start_hs_bias(priv, true); regmap_update_bits(cs42l43->regmap, CS42L43_HS2, CS42L43_HSDET_MODE_MASK, 0x3 << CS42L43_HSDET_MODE_SHIFT); @@ -665,6 +669,9 @@ static int cs42l43_run_type_detect(struct cs42l43_codec *priv) CS42L43_HSDET_MODE_MASK, 0x2 << CS42L43_HSDET_MODE_SHIFT); cs42l43_stop_hs_bias(priv); + regmap_update_bits(cs42l43->regmap, CS42L43_STEREO_MIC_CLAMP_CTRL, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK, 0); + if (!time_left) return -ETIMEDOUT; From abf078c0a322159f5ebe2adaa0cd69dc45b1e710 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 22 Apr 2025 21:32:51 +0200 Subject: [PATCH 100/559] wifi: mac80211: restore monitor for outgoing frames This code was accidentally dropped during the cooked monitor removal, but really should've been simplified instead. Add the simple version back. Fixes: 286e69677065 ("wifi: mac80211: Drop cooked monitor support") Link: https://patch.msgid.link/20250422213251.b3d65fd0f323.Id2a6901583f7af86bbe94deb355968b238f350c6@changeid Signed-off-by: Johannes Berg --- net/mac80211/status.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index b17b3cc7fb90..a362254b310c 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -1085,7 +1085,13 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, ieee80211_report_used_skb(local, skb, false, status->ack_hwtstamp); - if (status->free_list) + /* + * This is a bit racy but we can avoid a lot of work + * with this test... + */ + if (local->tx_mntrs) + ieee80211_tx_monitor(local, skb, retry_count, status); + else if (status->free_list) list_add_tail(&skb->list, status->free_list); else dev_kfree_skb(skb); From 72bb272541808188c54d0df30b7edce14d979538 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sun, 20 Apr 2025 12:01:49 +0300 Subject: [PATCH 101/559] Revert "wifi: iwlwifi: add support for BE213" This reverts commit 16a8d9a739430bec9c11eda69226c5a39f3478aa. This device needs commit 75a3313f52b7 ("wifi: iwlwifi: make no_160 more generic"), which has a bug and is being reverted until it is fixed. Since this device wasn't shipped yet it is ok to not support it. Reported-by: Todd Brandt Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220029 Fixes: 16a8d9a73943 ("wifi: iwlwifi: add support for BE213") Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420115541.581160ae3e4b.Icecc46baee8a797c00ad04fab92d7d1114b84829@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/cfg/sc.c | 2 -- .../net/wireless/intel/iwlwifi/iwl-config.h | 1 - .../wireless/intel/iwlwifi/iwl-nvm-parse.c | 12 ++++------- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 21 +++---------------- 4 files changed, 7 insertions(+), 29 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c index 670031fd60dc..59af36960f9c 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c @@ -142,8 +142,6 @@ const struct iwl_cfg_trans_params iwl_sc_trans_cfg = { .ltr_delay = IWL_CFG_TRANS_LTR_DELAY_2500US, }; -const char iwl_sp_name[] = "Intel(R) Wi-Fi 7 BE213 160MHz"; - const struct iwl_cfg iwl_cfg_sc = { .fw_name_mac = "sc", IWL_DEVICE_SC, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index b9bd89bfdd74..7e4864c00780 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -550,7 +550,6 @@ extern const char iwl_ax231_name[]; extern const char iwl_ax411_name[]; extern const char iwl_fm_name[]; extern const char iwl_wh_name[]; -extern const char iwl_sp_name[]; extern const char iwl_gl_name[]; extern const char iwl_mtp_name[]; extern const char iwl_dr_name[]; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index cd1b0048bb6d..08269168b2fa 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -944,8 +944,7 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans, IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK); break; case NL80211_BAND_6GHZ: - if (!trans->reduced_cap_sku && - trans->bw_limit >= 320) { + if (!trans->reduced_cap_sku) { iftype_data->eht_cap.eht_cap_elem.phy_cap_info[0] |= IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ; iftype_data->eht_cap.eht_cap_elem.phy_cap_info[1] |= @@ -1099,18 +1098,15 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans, iftype_data->he_cap.he_cap_elem.phy_cap_info[0] &= ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; - if (trans->bw_limit < 320 || trans->reduced_cap_sku) { + if (trans->reduced_cap_sku) { memset(&iftype_data->eht_cap.eht_mcs_nss_supp.bw._320, 0, sizeof(iftype_data->eht_cap.eht_mcs_nss_supp.bw._320)); - iftype_data->eht_cap.eht_cap_elem.phy_cap_info[2] &= - ~IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK; - } - - if (trans->reduced_cap_sku) { iftype_data->eht_cap.eht_mcs_nss_supp.bw._80.rx_tx_mcs13_max_nss = 0; iftype_data->eht_cap.eht_mcs_nss_supp.bw._160.rx_tx_mcs13_max_nss = 0; iftype_data->eht_cap.eht_cap_elem.phy_cap_info[8] &= ~IEEE80211_EHT_PHY_CAP8_RX_4096QAM_WIDER_BW_DL_OFDMA; + iftype_data->eht_cap.eht_cap_elem.phy_cap_info[2] &= + ~IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK; } } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 93446c374008..03f7eb46bbc7 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1187,13 +1187,8 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc, iwl_wh_name), - _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_MAC_TYPE_SC, IWL_CFG_ANY, - IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - 160, IWL_CFG_ANY, IWL_CFG_ANY, - iwl_cfg_sc, iwl_sp_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, @@ -1207,13 +1202,8 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2, iwl_wh_name), - _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, - IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - 160, IWL_CFG_ANY, IWL_CFG_ANY, - iwl_cfg_sc2, iwl_sp_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, @@ -1227,13 +1217,8 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2f, iwl_wh_name), - _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, - IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - 160, IWL_CFG_ANY, IWL_CFG_ANY, - iwl_cfg_sc2f, iwl_sp_name), /* Dr */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, From 64dc5d5e341d6145cce65e35bdfa0d6ab9fc0a75 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sun, 20 Apr 2025 12:01:50 +0300 Subject: [PATCH 102/559] Revert "wifi: iwlwifi: make no_160 more generic" This reverts commit 75a3313f52b7e08e7e73746f69a68c2b7c28bb2b. The indication of the BW limitation in the sub-device ID is not applicable for Killer devices. For those devices, bw_limit will hold a random value, so a matching dev_info might not be found, which leads to a probe failure. Until it is properly fixed, revert this. Reported-by: Todd Brandt Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220029 Fixes: 75a3313f52b7 ("wifi: iwlwifi: make no_160 more generic") Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420115541.36dd3007151e.I66b6b78db09bfea12ae84dd85603cf1583271474@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/iwl-config.h | 15 +- .../wireless/intel/iwlwifi/iwl-nvm-parse.c | 4 +- .../net/wireless/intel/iwlwifi/iwl-trans.h | 7 +- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 206 +++++++++--------- .../wireless/intel/iwlwifi/tests/devinfo.c | 15 +- 5 files changed, 118 insertions(+), 129 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 7e4864c00780..acafee538b8a 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -2,7 +2,7 @@ /* * Copyright (C) 2005-2014, 2018-2021 Intel Corporation * Copyright (C) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2025 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #ifndef __IWL_CONFIG_H__ #define __IWL_CONFIG_H__ @@ -451,8 +451,11 @@ struct iwl_cfg { #define IWL_CFG_RF_ID_HR 0x7 #define IWL_CFG_RF_ID_HR1 0x4 -#define IWL_CFG_BW_NO_LIM (U16_MAX - 1) -#define IWL_CFG_BW_ANY U16_MAX +#define IWL_CFG_NO_160 0x1 +#define IWL_CFG_160 0x0 + +#define IWL_CFG_NO_320 0x1 +#define IWL_CFG_320 0x0 #define IWL_CFG_CORES_BT 0x0 #define IWL_CFG_CORES_BT_GNSS 0x5 @@ -464,7 +467,7 @@ struct iwl_cfg { #define IWL_CFG_IS_JACKET 0x1 #define IWL_SUBDEVICE_RF_ID(subdevice) ((u16)((subdevice) & 0x00F0) >> 4) -#define IWL_SUBDEVICE_BW_LIM(subdevice) ((u16)((subdevice) & 0x0200) >> 9) +#define IWL_SUBDEVICE_NO_160(subdevice) ((u16)((subdevice) & 0x0200) >> 9) #define IWL_SUBDEVICE_CORES(subdevice) ((u16)((subdevice) & 0x1C00) >> 10) struct iwl_dev_info { @@ -472,10 +475,10 @@ struct iwl_dev_info { u16 subdevice; u16 mac_type; u16 rf_type; - u16 bw_limit; u8 mac_step; u8 rf_step; u8 rf_id; + u8 no_160; u8 cores; u8 cdb; u8 jacket; @@ -489,7 +492,7 @@ extern const unsigned int iwl_dev_info_table_size; const struct iwl_dev_info * iwl_pci_find_dev_info(u16 device, u16 subsystem_device, u16 mac_type, u8 mac_step, u16 rf_type, u8 cdb, - u8 jacket, u8 rf_id, u8 bw_limit, u8 cores, u8 rf_step); + u8 jacket, u8 rf_id, u8 no_160, u8 cores, u8 rf_step); extern const struct pci_device_id iwl_hw_card_ids[]; #endif diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 08269168b2fa..c381511e9ec6 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2005-2014, 2018-2023, 2025 Intel Corporation + * Copyright (C) 2005-2014, 2018-2023 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -1094,7 +1094,7 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans, iftype_data->eht_cap.eht_mcs_nss_supp.bw._320.rx_tx_mcs13_max_nss = 0; } - if (trans->bw_limit < 160) + if (trans->no_160) iftype_data->he_cap.he_cap_elem.phy_cap_info[0] &= ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 25fb4c50e38b..18698f0bd2e4 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2005-2014, 2018-2023, 2025 Intel Corporation + * Copyright (C) 2005-2014, 2018-2023 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -876,7 +876,7 @@ struct iwl_txq { * only valid for discrete (not integrated) NICs * @invalid_tx_cmd: invalid TX command buffer * @reduced_cap_sku: reduced capability supported SKU - * @bw_limit: the max bandwidth + * @no_160: device not supporting 160 MHz * @step_urm: STEP is in URM, no support for MCS>9 in 320 MHz * @restart: restart worker data * @restart.wk: restart worker @@ -911,8 +911,7 @@ struct iwl_trans { char hw_id_str[52]; u32 sku_id[3]; bool reduced_cap_sku; - u16 bw_limit; - bool step_urm; + u8 no_160:1, step_urm:1; u8 dsbr_urm_fw_dependent:1, dsbr_urm_permanent:1; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 03f7eb46bbc7..6c22c95f28d5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2005-2014, 2018-2025 Intel Corporation + * Copyright (C) 2005-2014, 2018-2024 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -552,17 +552,16 @@ MODULE_DEVICE_TABLE(pci, iwl_hw_card_ids); EXPORT_SYMBOL_IF_IWLWIFI_KUNIT(iwl_hw_card_ids); #define _IWL_DEV_INFO(_device, _subdevice, _mac_type, _mac_step, _rf_type, \ - _rf_id, _rf_step, _bw_limit, _cores, _cdb, _cfg, _name) \ + _rf_id, _rf_step, _no_160, _cores, _cdb, _cfg, _name) \ { .device = (_device), .subdevice = (_subdevice), .cfg = &(_cfg), \ .name = _name, .mac_type = _mac_type, .rf_type = _rf_type, .rf_step = _rf_step, \ - .bw_limit = _bw_limit, .cores = _cores, .rf_id = _rf_id, \ + .no_160 = _no_160, .cores = _cores, .rf_id = _rf_id, \ .mac_step = _mac_step, .cdb = _cdb, .jacket = IWL_CFG_ANY } #define IWL_DEV_INFO(_device, _subdevice, _cfg, _name) \ _IWL_DEV_INFO(_device, _subdevice, IWL_CFG_ANY, IWL_CFG_ANY, \ - IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, \ - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_ANY, \ - _cfg, _name) + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, \ + IWL_CFG_ANY, _cfg, _name) VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { #if IS_ENABLED(CONFIG_IWLMVM) @@ -725,66 +724,66 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9462_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9560_name), _IWL_DEV_INFO(0x2526, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT_GNSS, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT_GNSS, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9270_160_name), _IWL_DEV_INFO(0x2526, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT_GNSS, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT_GNSS, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9270_name), _IWL_DEV_INFO(0x271B, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH1, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9162_160_name), _IWL_DEV_INFO(0x271B, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH1, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9162_name), _IWL_DEV_INFO(0x2526, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9260_160_name), _IWL_DEV_INFO(0x2526, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9260_name), /* Qu with Jf */ @@ -792,132 +791,132 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9462_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9560_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1551, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9560_killer_1550s_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1552, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9560_killer_1550i_name), /* Qu C step */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9462_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9560_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1551, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9560_killer_1550s_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1552, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9560_killer_1550i_name), /* QuZ */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9462_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9560_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1551, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9560_killer_1550s_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1552, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9560_killer_1550i_name), /* Qu with Hr */ @@ -925,189 +924,189 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_b0_hr1_b0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_b0_hr_b0, iwl_ax203_name), /* Qu C step */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_c0_hr1_b0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_c0_hr_b0, iwl_ax203_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_c0_hr_b0, iwl_ax201_name), /* QuZ */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_quz_a0_hr1_b0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, SILICON_B_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_quz_a0_hr_b0, iwl_ax203_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, SILICON_B_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_quz_a0_hr_b0, iwl_ax201_name), /* Ma */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_MA, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_ma, iwl_ax201_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_MA, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_ma, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_MA, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_ma, iwl_ax231_name), /* So with Hr */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax203_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax201_name), /* So-F with Hr */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax203_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax201_name), /* So-F with Gf */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_CDB, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_name), /* SoF with JF2 */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9560_name), /* SoF with JF */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9462_name), /* So with GF */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_CDB, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_name), /* So with JF2 */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9560_name), /* So with JF */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9462_name), #endif /* CONFIG_IWLMVM */ @@ -1116,13 +1115,13 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_ax201_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, @@ -1134,104 +1133,104 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_wh_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ_W, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_ax201_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ_W, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ_W, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_fm_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ_W, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_wh_name), /* Ga (Gl) */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_GL, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_320, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_gl, iwl_gl_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_GL, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - 160, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_320, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_gl, iwl_mtp_name), /* Sc */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc, iwl_fm_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc, iwl_wh_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2, iwl_fm_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2, iwl_wh_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2f, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2f, iwl_fm_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2f, iwl_wh_name), /* Dr */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_DR, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_dr, iwl_dr_name), /* Br */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BR, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_br, iwl_br_name), #endif /* CONFIG_IWLMLD */ }; @@ -1383,7 +1382,7 @@ out: VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info * iwl_pci_find_dev_info(u16 device, u16 subsystem_device, u16 mac_type, u8 mac_step, u16 rf_type, u8 cdb, - u8 jacket, u8 rf_id, u8 bw_limit, u8 cores, u8 rf_step) + u8 jacket, u8 rf_id, u8 no_160, u8 cores, u8 rf_step) { int num_devices = ARRAY_SIZE(iwl_dev_info_table); int i; @@ -1426,15 +1425,8 @@ iwl_pci_find_dev_info(u16 device, u16 subsystem_device, dev_info->rf_id != rf_id) continue; - /* - * Check that bw_limit have the same "boolean" value since - * IWL_SUBDEVICE_BW_LIM can only return a boolean value and - * dev_info->bw_limit encodes a non-boolean value. - * dev_info->bw_limit == IWL_CFG_BW_NO_LIM must be equal to - * !bw_limit to have a match. - */ - if (dev_info->bw_limit != IWL_CFG_BW_ANY && - (dev_info->bw_limit == IWL_CFG_BW_NO_LIM) == !!bw_limit) + if (dev_info->no_160 != (u8)IWL_CFG_ANY && + dev_info->no_160 != no_160) continue; if (dev_info->cores != (u8)IWL_CFG_ANY && @@ -1572,13 +1564,13 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) CSR_HW_RFID_IS_CDB(iwl_trans->hw_rf_id), CSR_HW_RFID_IS_JACKET(iwl_trans->hw_rf_id), IWL_SUBDEVICE_RF_ID(pdev->subsystem_device), - IWL_SUBDEVICE_BW_LIM(pdev->subsystem_device), + IWL_SUBDEVICE_NO_160(pdev->subsystem_device), IWL_SUBDEVICE_CORES(pdev->subsystem_device), CSR_HW_RFID_STEP(iwl_trans->hw_rf_id)); if (dev_info) { iwl_trans->cfg = dev_info->cfg; iwl_trans->name = dev_info->name; - iwl_trans->bw_limit = dev_info->bw_limit; + iwl_trans->no_160 = dev_info->no_160 == IWL_CFG_NO_160; } #if IS_ENABLED(CONFIG_IWLMVM) diff --git a/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c b/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c index 7ef5e89c6af2..d0bda23c628a 100644 --- a/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c +++ b/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c @@ -2,7 +2,7 @@ /* * KUnit tests for the iwlwifi device info table * - * Copyright (C) 2023-2025 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation */ #include #include @@ -13,9 +13,9 @@ MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING"); static void iwl_pci_print_dev_info(const char *pfx, const struct iwl_dev_info *di) { - printk(KERN_DEBUG "%sdev=%.4x,subdev=%.4x,mac_type=%.4x,mac_step=%.4x,rf_type=%.4x,cdb=%d,jacket=%d,rf_id=%.2x,bw_limit=%d,cores=%.2x\n", + printk(KERN_DEBUG "%sdev=%.4x,subdev=%.4x,mac_type=%.4x,mac_step=%.4x,rf_type=%.4x,cdb=%d,jacket=%d,rf_id=%.2x,no_160=%d,cores=%.2x\n", pfx, di->device, di->subdevice, di->mac_type, di->mac_step, - di->rf_type, di->cdb, di->jacket, di->rf_id, di->bw_limit, + di->rf_type, di->cdb, di->jacket, di->rf_id, di->no_160, di->cores); } @@ -31,13 +31,8 @@ static void devinfo_table_order(struct kunit *test) di->mac_type, di->mac_step, di->rf_type, di->cdb, di->jacket, di->rf_id, - di->bw_limit != IWL_CFG_BW_NO_LIM, - di->cores, di->rf_step); - if (!ret) { - iwl_pci_print_dev_info("No entry found for: ", di); - KUNIT_FAIL(test, - "No entry found for entry at index %d\n", idx); - } else if (ret != di) { + di->no_160, di->cores, di->rf_step); + if (ret != di) { iwl_pci_print_dev_info("searched: ", di); iwl_pci_print_dev_info("found: ", ret); KUNIT_FAIL(test, From 4f7a07791944e57ea5f12ce03939e3ad0fd50504 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sun, 20 Apr 2025 09:59:55 +0300 Subject: [PATCH 103/559] wifi: iwlwifi: mld: properly handle async notification in op mode start From the moment that we have ALIVE, we can receive notification that are handled asynchronously. Some notifications (for example iwl_rfi_support_notif) requires an operational FW. So we need to make sure that they were handled in iwl_op_mode_mld_start before we stop the FW. Flush the async_handlers_wk there to achieve that. Also, if loading the FW in op mode start failed, we need to cancel these notifications, as they are from a dead FW. More than that, not doing so can cause us to access freed memory if async_handlers_wk is executed after ieee80211_free_hw is called. Fix this by canceling all async notifications if a failure occurred in init (after ALIVE). Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420095642.1a8579662437.Ifd77d9c1a29fdd278b0a7bfc2709dd5d5e5efdb1@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/fw.c | 13 ++++++++++--- drivers/net/wireless/intel/iwlwifi/mld/mld.c | 5 +++++ drivers/net/wireless/intel/iwlwifi/mld/mld.h | 5 ----- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/fw.c b/drivers/net/wireless/intel/iwlwifi/mld/fw.c index 62da137e1024..4b083d447ee2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/fw.c @@ -333,19 +333,22 @@ int iwl_mld_load_fw(struct iwl_mld *mld) ret = iwl_trans_start_hw(mld->trans); if (ret) - return ret; + goto err; ret = iwl_mld_run_fw_init_sequence(mld); if (ret) - return ret; + goto err; ret = iwl_mld_init_mcc(mld); if (ret) - return ret; + goto err; mld->fw_status.running = true; return 0; +err: + iwl_mld_stop_fw(mld); + return ret; } void iwl_mld_stop_fw(struct iwl_mld *mld) @@ -358,6 +361,10 @@ void iwl_mld_stop_fw(struct iwl_mld *mld) iwl_trans_stop_device(mld->trans); + wiphy_work_cancel(mld->wiphy, &mld->async_handlers_wk); + + iwl_mld_purge_async_handlers_list(mld); + mld->fw_status.running = false; } diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index d4a99ae64074..cfdf52b43c68 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -417,6 +417,11 @@ iwl_op_mode_mld_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, goto free_hw; } + /* We are about to stop the FW. Notifications may require an + * operational FW, so handle them all here before we stop. + */ + wiphy_work_flush(mld->wiphy, &mld->async_handlers_wk); + iwl_mld_stop_fw(mld); wiphy_unlock(mld->wiphy); diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.h b/drivers/net/wireless/intel/iwlwifi/mld/mld.h index 5eceaaf7696d..a4a16da6ebf3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.h @@ -298,11 +298,6 @@ iwl_cleanup_mld(struct iwl_mld *mld) #endif iwl_mld_low_latency_restart_cleanup(mld); - - /* Empty the list of async notification handlers so we won't process - * notifications from the dead fw after the reconfig flow. - */ - iwl_mld_purge_async_handlers_list(mld); } enum iwl_power_scheme { From c155f7c3ad1e70a1e203047b20e3bca235ada207 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sun, 20 Apr 2025 09:59:56 +0300 Subject: [PATCH 104/559] wifi: iwlwifi: mld: inform trans on init failure If starting the op mode failed, the opmode memory is being freed, so trans->op_mode needs to be NULLified. Otherwise, trans will access already freed memory. Call iwl_trans_op_mode_leave in that case. Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420095642.3331d1686556.Ifaf15bdd8ef8c59e04effbd2e7aa0034b30eeacb@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/mld.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index cfdf52b43c68..4a0842a46a8d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -414,7 +414,7 @@ iwl_op_mode_mld_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, wiphy_unlock(mld->wiphy); rtnl_unlock(); iwl_fw_flush_dumps(&mld->fwrt); - goto free_hw; + goto err; } /* We are about to stop the FW. Notifications may require an @@ -460,7 +460,8 @@ leds_exit: iwl_mld_leds_exit(mld); free_nvm: kfree(mld->nvm_data); -free_hw: +err: + iwl_trans_op_mode_leave(mld->trans); ieee80211_free_hw(mld->hw); return ERR_PTR(ret); } From d1ee2c1922566257cd6cc4ac7c21974d708fea62 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Sun, 20 Apr 2025 09:59:57 +0300 Subject: [PATCH 105/559] wifi: iwlwifi: mld: only create debugfs symlink if it does not exist When mac80211 switches between non-MLO and MLO it will recreate the debugfs directories. This results in the add_if_debugfs handler being called multiple times. As the convenience symlink is created in the mld debugfs directory and not the vif, it will not be removed by mac80211 when this happens and still exists. Add a check and only create the convenience symlink if we have not yet done so. Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420095642.2490696f032a.I74319c7cf18f7e16a3d331cb96e38504b9fbab66@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/debugfs.c | 5 +++-- drivers/net/wireless/intel/iwlwifi/mld/mac80211.c | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c index 89d95e9b4f30..93f9f78e4276 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c @@ -949,8 +949,9 @@ void iwl_mld_add_vif_debugfs(struct ieee80211_hw *hw, snprintf(name, sizeof(name), "%pd", vif->debugfs_dir); snprintf(target, sizeof(target), "../../../%pd3/iwlmld", vif->debugfs_dir); - mld_vif->dbgfs_slink = - debugfs_create_symlink(name, mld->debugfs_dir, target); + if (!mld_vif->dbgfs_slink) + mld_vif->dbgfs_slink = + debugfs_create_symlink(name, mld->debugfs_dir, target); if (iwlmld_mod_params.power_scheme != IWL_POWER_SCHEME_CAM && vif->type == NL80211_IFTYPE_STATION) { diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 99e13cfd1e5f..68d97d3b8f02 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -651,6 +651,7 @@ void iwl_mld_mac80211_remove_interface(struct ieee80211_hw *hw, #ifdef CONFIG_IWLWIFI_DEBUGFS debugfs_remove(iwl_mld_vif_from_mac80211(vif)->dbgfs_slink); + iwl_mld_vif_from_mac80211(vif)->dbgfs_slink = NULL; #endif iwl_mld_rm_vif(mld, vif); From d49437a6afc707951e5767ef70c9726b6c05da08 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 20 Apr 2025 09:59:58 +0300 Subject: [PATCH 106/559] wifi: iwlwifi: back off on continuous errors When errors occur repeatedly, the driver shouldn't go into a tight loop trying to reset the device. Implement the backoff I had already defined IWL_TRANS_RESET_DELAY for, but clearly forgotten the implementation of. Fixes: 9a2f13c40c63 ("wifi: iwlwifi: implement reset escalation") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420095642.8816e299efa2.I82cde34e2345a2b33b1f03dbb040f5ad3439a5aa@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/iwl-trans.c | 27 ++++++++++++++----- .../net/wireless/intel/iwlwifi/iwl-trans.h | 7 +++-- .../net/wireless/intel/iwlwifi/pcie/trans.c | 3 ++- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index c1607b6d0759..6125fe70ce72 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -21,6 +21,7 @@ struct iwl_trans_dev_restart_data { struct list_head list; unsigned int restart_count; time64_t last_error; + bool backoff; char name[]; }; @@ -125,13 +126,20 @@ iwl_trans_determine_restart_mode(struct iwl_trans *trans) if (!data) return at_least; - if (ktime_get_boottime_seconds() - data->last_error >= + if (!data->backoff && + ktime_get_boottime_seconds() - data->last_error >= IWL_TRANS_RESET_OK_TIME) data->restart_count = 0; index = data->restart_count; - if (index >= ARRAY_SIZE(escalation_list)) + if (index >= ARRAY_SIZE(escalation_list)) { index = ARRAY_SIZE(escalation_list) - 1; + if (!data->backoff) { + data->backoff = true; + return IWL_RESET_MODE_BACKOFF; + } + data->backoff = false; + } return max(at_least, escalation_list[index]); } @@ -140,7 +148,8 @@ iwl_trans_determine_restart_mode(struct iwl_trans *trans) static void iwl_trans_restart_wk(struct work_struct *wk) { - struct iwl_trans *trans = container_of(wk, typeof(*trans), restart.wk); + struct iwl_trans *trans = container_of(wk, typeof(*trans), + restart.wk.work); struct iwl_trans_reprobe *reprobe; enum iwl_reset_mode mode; @@ -168,6 +177,12 @@ static void iwl_trans_restart_wk(struct work_struct *wk) return; mode = iwl_trans_determine_restart_mode(trans); + if (mode == IWL_RESET_MODE_BACKOFF) { + IWL_ERR(trans, "Too many device errors - delay next reset\n"); + queue_delayed_work(system_unbound_wq, &trans->restart.wk, + IWL_TRANS_RESET_DELAY); + return; + } iwl_trans_inc_restart_count(trans->dev); @@ -227,7 +242,7 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, trans->dev = dev; trans->num_rx_queues = 1; - INIT_WORK(&trans->restart.wk, iwl_trans_restart_wk); + INIT_DELAYED_WORK(&trans->restart.wk, iwl_trans_restart_wk); return trans; } @@ -271,7 +286,7 @@ int iwl_trans_init(struct iwl_trans *trans) void iwl_trans_free(struct iwl_trans *trans) { - cancel_work_sync(&trans->restart.wk); + cancel_delayed_work_sync(&trans->restart.wk); kmem_cache_destroy(trans->dev_cmd_pool); } @@ -403,7 +418,7 @@ void iwl_trans_op_mode_leave(struct iwl_trans *trans) iwl_trans_pcie_op_mode_leave(trans); - cancel_work_sync(&trans->restart.wk); + cancel_delayed_work_sync(&trans->restart.wk); trans->op_mode = NULL; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 18698f0bd2e4..ce4954b0d524 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -961,7 +961,7 @@ struct iwl_trans { struct iwl_dma_ptr invalid_tx_cmd; struct { - struct work_struct wk; + struct delayed_work wk; struct iwl_fw_error_dump_mode mode; bool during_reset; } restart; @@ -1162,7 +1162,7 @@ static inline void iwl_trans_schedule_reset(struct iwl_trans *trans, */ trans->restart.during_reset = test_bit(STATUS_IN_SW_RESET, &trans->status); - queue_work(system_unbound_wq, &trans->restart.wk); + queue_delayed_work(system_unbound_wq, &trans->restart.wk, 0); } static inline void iwl_trans_fw_error(struct iwl_trans *trans, @@ -1261,6 +1261,9 @@ enum iwl_reset_mode { IWL_RESET_MODE_RESCAN, IWL_RESET_MODE_FUNC_RESET, IWL_RESET_MODE_PROD_RESET, + + /* keep last - special backoff value */ + IWL_RESET_MODE_BACKOFF, }; void iwl_trans_pcie_reset(struct iwl_trans *trans, enum iwl_reset_mode mode); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index c917ed4c19bc..b1ccace7377f 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2351,7 +2351,8 @@ void iwl_trans_pcie_reset(struct iwl_trans *trans, enum iwl_reset_mode mode) struct iwl_trans_pcie_removal *removal; char _msg = 0, *msg = &_msg; - if (WARN_ON(mode < IWL_RESET_MODE_REMOVE_ONLY)) + if (WARN_ON(mode < IWL_RESET_MODE_REMOVE_ONLY || + mode == IWL_RESET_MODE_BACKOFF)) return; if (test_bit(STATUS_TRANS_DEAD, &trans->status)) From 60d418e8540402f4732cce4e8df428e747d79e47 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 20 Apr 2025 09:59:59 +0300 Subject: [PATCH 107/559] wifi: iwlwifi: mld: fix BAID validity check Perhaps IWL_FW_CHECK() is a bit misnamed, but it just returns the value of the inner condition. Therefore, the current code skips the actual function when it has the BAID data and makes it crash later when it doesn't. Fix the logic. Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Johannes Berg Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420095642.9c0b84c44c3b.Ied236258854b149960eb357ec61bf3a572503fbc@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/agg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/agg.c b/drivers/net/wireless/intel/iwlwifi/mld/agg.c index db9e0f04f4b7..687a9450ac98 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/agg.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/agg.c @@ -124,9 +124,9 @@ void iwl_mld_handle_bar_frame_release_notif(struct iwl_mld *mld, rcu_read_lock(); baid_data = rcu_dereference(mld->fw_id_to_ba[baid]); - if (!IWL_FW_CHECK(mld, !baid_data, - "Got valid BAID %d but not allocated, invalid BAR release!\n", - baid)) + if (IWL_FW_CHECK(mld, !baid_data, + "Got valid BAID %d but not allocated, invalid BAR release!\n", + baid)) goto out_unlock; if (IWL_FW_CHECK(mld, tid != baid_data->tid || From 15220a257319ffe3bf95796326dfe0aacdbeb1c4 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 20 Apr 2025 10:00:00 +0300 Subject: [PATCH 108/559] wifi: iwlwifi: don't warn if the NIC is gone in resume Some BIOSes decide to power gate the WLAN device during S3. Since iwlwifi doesn't expect this, it gets very noisy reporting that the device is no longer available. Wifi is still available because iwlwifi recovers, but it spews scary prints in the log. Fix that by failing gracefully. Fixes: e8bb19c1d590 ("wifi: iwlwifi: support fast resume") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219597 Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420095642.d8d58146c829.I569ca15eaaa774d633038a749cc6ec7448419714@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/iwl-trans.c | 1 - drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 20 ++++++++++++++++--- .../wireless/intel/iwlwifi/pcie/internal.h | 9 +++++---- .../net/wireless/intel/iwlwifi/pcie/trans.c | 13 +++++++++--- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 2 +- 5 files changed, 33 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index 6125fe70ce72..e7b2e08645ef 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -555,7 +555,6 @@ void __releases(nic_access) iwl_trans_release_nic_access(struct iwl_trans *trans) { iwl_trans_pcie_release_nic_access(trans); - __release(nic_access); } IWL_EXPORT_SYMBOL(iwl_trans_release_nic_access); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 6c22c95f28d5..3a605e7c070e 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1737,10 +1737,24 @@ static int _iwl_pci_resume(struct device *device, bool restore) * need to reset it completely. * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan, * so assume that any bits there mean that the device is usable. + * For older devices, just try silently to grab the NIC. */ - if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ && - !iwl_read32(trans, CSR_FUNC_SCRATCH)) - device_was_powered_off = true; + if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) { + if (!iwl_read32(trans, CSR_FUNC_SCRATCH)) + device_was_powered_off = true; + } else { + /* + * bh are re-enabled by iwl_trans_pcie_release_nic_access, + * so re-enable them if _iwl_trans_pcie_grab_nic_access fails. + */ + local_bh_disable(); + if (_iwl_trans_pcie_grab_nic_access(trans, true)) { + iwl_trans_pcie_release_nic_access(trans); + } else { + device_was_powered_off = true; + local_bh_enable(); + } + } if (restore || device_was_powered_off) { trans->state = IWL_TRANS_NO_FW; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 45460f93d24a..114a9195ad7f 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -558,10 +558,10 @@ void iwl_trans_pcie_free(struct iwl_trans *trans); void iwl_trans_pcie_free_pnvm_dram_regions(struct iwl_dram_regions *dram_regions, struct device *dev); -bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans); -#define _iwl_trans_pcie_grab_nic_access(trans) \ +bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, bool silent); +#define _iwl_trans_pcie_grab_nic_access(trans, silent) \ __cond_lock(nic_access_nobh, \ - likely(__iwl_trans_pcie_grab_nic_access(trans))) + likely(__iwl_trans_pcie_grab_nic_access(trans, silent))) void iwl_trans_pcie_check_product_reset_status(struct pci_dev *pdev); void iwl_trans_pcie_check_product_reset_mode(struct pci_dev *pdev); @@ -1105,7 +1105,8 @@ void iwl_trans_pcie_set_bits_mask(struct iwl_trans *trans, u32 reg, int iwl_trans_pcie_read_config32(struct iwl_trans *trans, u32 ofs, u32 *val); bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans); -void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans); +void __releases(nic_access_nobh) +iwl_trans_pcie_release_nic_access(struct iwl_trans *trans); /* transport gen 1 exported functions */ void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index b1ccace7377f..102a6123bba0 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2406,7 +2406,7 @@ EXPORT_SYMBOL(iwl_trans_pcie_reset); * This version doesn't disable BHs but rather assumes they're * already disabled. */ -bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans) +bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, bool silent) { int ret; struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); @@ -2458,6 +2458,11 @@ bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans) if (unlikely(ret < 0)) { u32 cntrl = iwl_read32(trans, CSR_GP_CNTRL); + if (silent) { + spin_unlock(&trans_pcie->reg_lock); + return false; + } + WARN_ONCE(1, "Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n", cntrl); @@ -2489,7 +2494,7 @@ bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans) bool ret; local_bh_disable(); - ret = __iwl_trans_pcie_grab_nic_access(trans); + ret = __iwl_trans_pcie_grab_nic_access(trans, false); if (ret) { /* keep BHs disabled until iwl_trans_pcie_release_nic_access */ return ret; @@ -2498,7 +2503,8 @@ bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans) return false; } -void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans) +void __releases(nic_access_nobh) +iwl_trans_pcie_release_nic_access(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); @@ -2525,6 +2531,7 @@ void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans) * scheduled on different CPUs (after we drop reg_lock). */ out: + __release(nic_access_nobh); spin_unlock_bh(&trans_pcie->reg_lock); } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index bb90bcfc6763..9fc4e98693eb 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1021,7 +1021,7 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans, * returned. This needs to be done only on NICs that have * apmg_wake_up_wa set (see above.) */ - if (!_iwl_trans_pcie_grab_nic_access(trans)) + if (!_iwl_trans_pcie_grab_nic_access(trans, false)) return -EIO; /* From a17821321a9b42f26e77335cd525fee72dc1cd63 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 20 Apr 2025 10:00:01 +0300 Subject: [PATCH 109/559] wifi: iwlwifi: fix the check for the SCRATCH register upon resume We can't rely on the SCRATCH register being 0 on platform that power gate the NIC in S3. Even in those platforms, the SCRATCH register is still returning 0x1010000. Make sure that we understand that those platforms have powered off the device. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219597 Fixes: cb347bd29d0d ("wifi: iwlwifi: mvm: fix hibernation") Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420095642.a7e082ee785c.I9418d76f860f54261cfa89e1f7ac10300904ba40@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-csr.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index be9e464c9b7b..3ff493e920d2 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -148,6 +148,7 @@ * during a error FW error. */ #define CSR_FUNC_SCRATCH_INIT_VALUE (0x01010101) +#define CSR_FUNC_SCRATCH_POWER_OFF_MASK 0xFFFF /* Bits for CSR_HW_IF_CONFIG_REG */ #define CSR_HW_IF_CONFIG_REG_MSK_MAC_STEP_DASH (0x0000000F) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 3a605e7c070e..debeea2b3ae5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1736,11 +1736,13 @@ static int _iwl_pci_resume(struct device *device, bool restore) * Scratch value was altered, this means the device was powered off, we * need to reset it completely. * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan, - * so assume that any bits there mean that the device is usable. + * but not bits [15:8]. So if we have bits set in lower word, assume + * the device is alive. * For older devices, just try silently to grab the NIC. */ if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) { - if (!iwl_read32(trans, CSR_FUNC_SCRATCH)) + if (!(iwl_read32(trans, CSR_FUNC_SCRATCH) & + CSR_FUNC_SCRATCH_POWER_OFF_MASK)) device_was_powered_off = true; } else { /* From 0fb15ae3b0a9221be01715dac0335647c79f3362 Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Fri, 21 Mar 2025 21:52:25 +0300 Subject: [PATCH 110/559] wifi: plfxlc: Remove erroneous assert in plfxlc_mac_release plfxlc_mac_release() asserts that mac->lock is held. This assertion is incorrect, because even if it was possible, it would not be the valid behaviour. The function is used when probe fails or after the device is disconnected. In both cases mac->lock can not be held as the driver is not working with the device at the moment. All functions that use mac->lock unlock it just after it was held. There is also no need to hold mac->lock for plfxlc_mac_release() itself, as mac data is not affected, except for mac->flags, which is modified atomically. This bug leads to the following warning: ================================================================ WARNING: CPU: 0 PID: 127 at drivers/net/wireless/purelifi/plfxlc/mac.c:106 plfxlc_mac_release+0x7d/0xa0 Modules linked in: CPU: 0 PID: 127 Comm: kworker/0:2 Not tainted 6.1.124-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Workqueue: usb_hub_wq hub_event RIP: 0010:plfxlc_mac_release+0x7d/0xa0 drivers/net/wireless/purelifi/plfxlc/mac.c:106 Call Trace: probe+0x941/0xbd0 drivers/net/wireless/purelifi/plfxlc/usb.c:694 usb_probe_interface+0x5c0/0xaf0 drivers/usb/core/driver.c:396 really_probe+0x2ab/0xcb0 drivers/base/dd.c:639 __driver_probe_device+0x1a2/0x3d0 drivers/base/dd.c:785 driver_probe_device+0x50/0x420 drivers/base/dd.c:815 __device_attach_driver+0x2cf/0x510 drivers/base/dd.c:943 bus_for_each_drv+0x183/0x200 drivers/base/bus.c:429 __device_attach+0x359/0x570 drivers/base/dd.c:1015 bus_probe_device+0xba/0x1e0 drivers/base/bus.c:489 device_add+0xb48/0xfd0 drivers/base/core.c:3696 usb_set_configuration+0x19dd/0x2020 drivers/usb/core/message.c:2165 usb_generic_driver_probe+0x84/0x140 drivers/usb/core/generic.c:238 usb_probe_device+0x130/0x260 drivers/usb/core/driver.c:293 really_probe+0x2ab/0xcb0 drivers/base/dd.c:639 __driver_probe_device+0x1a2/0x3d0 drivers/base/dd.c:785 driver_probe_device+0x50/0x420 drivers/base/dd.c:815 __device_attach_driver+0x2cf/0x510 drivers/base/dd.c:943 bus_for_each_drv+0x183/0x200 drivers/base/bus.c:429 __device_attach+0x359/0x570 drivers/base/dd.c:1015 bus_probe_device+0xba/0x1e0 drivers/base/bus.c:489 device_add+0xb48/0xfd0 drivers/base/core.c:3696 usb_new_device+0xbdd/0x18f0 drivers/usb/core/hub.c:2620 hub_port_connect drivers/usb/core/hub.c:5477 [inline] hub_port_connect_change drivers/usb/core/hub.c:5617 [inline] port_event drivers/usb/core/hub.c:5773 [inline] hub_event+0x2efe/0x5730 drivers/usb/core/hub.c:5855 process_one_work+0x8a9/0x11d0 kernel/workqueue.c:2292 worker_thread+0xa47/0x1200 kernel/workqueue.c:2439 kthread+0x28d/0x320 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 ================================================================ Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 68d57a07bfe5 ("wireless: add plfxlc driver for pureLiFi X, XL, XC devices") Reported-by: syzbot+7d4f142f6c288de8abfe@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7d4f142f6c288de8abfe Signed-off-by: Murad Masimov Link: https://patch.msgid.link/20250321185226.71-2-m.masimov@mt-integration.ru Signed-off-by: Johannes Berg --- drivers/net/wireless/purelifi/plfxlc/mac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/purelifi/plfxlc/mac.c b/drivers/net/wireless/purelifi/plfxlc/mac.c index eae93efa6150..82d1bf7edba2 100644 --- a/drivers/net/wireless/purelifi/plfxlc/mac.c +++ b/drivers/net/wireless/purelifi/plfxlc/mac.c @@ -102,7 +102,6 @@ int plfxlc_mac_init_hw(struct ieee80211_hw *hw) void plfxlc_mac_release(struct plfxlc_mac *mac) { plfxlc_chip_release(&mac->chip); - lockdep_assert_held(&mac->lock); } int plfxlc_op_start(struct ieee80211_hw *hw) From 8e089e7b585d95122c8122d732d1d5ef8f879396 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Tue, 22 Apr 2025 12:22:02 +0800 Subject: [PATCH 111/559] wifi: brcm80211: fmac: Add error handling for brcmf_usb_dl_writeimage() The function brcmf_usb_dl_writeimage() calls the function brcmf_usb_dl_cmd() but dose not check its return value. The 'state.state' and the 'state.bytes' are uninitialized if the function brcmf_usb_dl_cmd() fails. It is dangerous to use uninitialized variables in the conditions. Add error handling for brcmf_usb_dl_cmd() to jump to error handling path if the brcmf_usb_dl_cmd() fails and the 'state.state' and the 'state.bytes' are uninitialized. Improve the error message to report more detailed error information. Fixes: 71bb244ba2fd ("brcm80211: fmac: add USB support for bcm43235/6/8 chipsets") Cc: stable@vger.kernel.org # v3.4+ Signed-off-by: Wentao Liang Acked-by: Arend van Spriel Link: https://patch.msgid.link/20250422042203.2259-1-vulab@iscas.ac.cn Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index 2821c27f317e..d06c724f63d9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -896,14 +896,16 @@ brcmf_usb_dl_writeimage(struct brcmf_usbdev_info *devinfo, u8 *fw, int fwlen) } /* 1) Prepare USB boot loader for runtime image */ - brcmf_usb_dl_cmd(devinfo, DL_START, &state, sizeof(state)); + err = brcmf_usb_dl_cmd(devinfo, DL_START, &state, sizeof(state)); + if (err) + goto fail; rdlstate = le32_to_cpu(state.state); rdlbytes = le32_to_cpu(state.bytes); /* 2) Check we are in the Waiting state */ if (rdlstate != DL_WAITING) { - brcmf_err("Failed to DL_START\n"); + brcmf_err("Invalid DL state: %u\n", rdlstate); err = -EINVAL; goto fail; } From 175e69e33c66904dfe910c5f43edfe5c95b32f0c Mon Sep 17 00:00:00 2001 From: Itamar Shalev Date: Wed, 23 Apr 2025 12:25:02 +0300 Subject: [PATCH 112/559] wifi: iwlwifi: restore missing initialization of async_handlers_list The initialization of async_handlers_list was accidentally removed in a previous change. This patch restores the missing initialization to ensure proper handler registration. Fixes: 6895d74c11d8 ("wifi: iwlwifi: mld: initialize regulatory early") Signed-off-by: Itamar Shalev Acked-by: Miri Korenblit Link: https://patch.msgid.link/20250423092503.35206-1-itamar.shalev@intel.com Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/mld.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index 4a0842a46a8d..73d2166a4c25 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -75,6 +75,7 @@ void iwl_construct_mld(struct iwl_mld *mld, struct iwl_trans *trans, /* Setup async RX handling */ spin_lock_init(&mld->async_handlers_lock); + INIT_LIST_HEAD(&mld->async_handlers_list); wiphy_work_init(&mld->async_handlers_wk, iwl_mld_async_handlers_wk); From bbe5679f30d7690a9b6838a583b9690ea73fe0e9 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Tue, 15 Apr 2025 14:19:00 +0200 Subject: [PATCH 113/559] drm/nouveau: Fix WARN_ON in nouveau_fence_context_kill() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nouveau is mostly designed in a way that it's expected that fences only ever get signaled through nouveau_fence_signal(). However, in at least one other place, nouveau_fence_done(), can signal fences, too. If that happens (race) a signaled fence remains in the pending list for a while, until it gets removed by nouveau_fence_update(). Should nouveau_fence_context_kill() run in the meantime, this would be a bug because the function would attempt to set an error code on an already signaled fence. Have nouveau_fence_context_kill() check for a fence being signaled. Cc: stable@vger.kernel.org # v5.10+ Fixes: ea13e5abf807 ("drm/nouveau: signal pending fences when channel has been killed") Suggested-by: Christian König Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250415121900.55719-3-phasta@kernel.org Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 7cc84472cece..edddfc036c6d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -90,7 +90,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) while (!list_empty(&fctx->pending)) { fence = list_entry(fctx->pending.next, typeof(*fence), head); - if (error) + if (error && !dma_fence_is_signaled_locked(&fence->base)) dma_fence_set_error(&fence->base, error); if (nouveau_fence_signal(fence)) From da6d7db8b1620521d093a973a0110898f6585ff9 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 23 Apr 2025 13:57:22 +0800 Subject: [PATCH 114/559] ASoC: soc-acpi-intel-ptl-match: add empty item to ptl_cs42l43_l3[] An empty item is required to terminate the look up loop. Fixes: ac5b4a24f16f ("ASoC: Intel: soc-acpi-intel-ptl-match: Add cs42l43 support") Signed-off-by: Bard Liao Reviewed-by: Naveen Manohar Reviewed-by: Ranjani Sridharan Link: https://patch.msgid.link/20250423055722.6920-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-ptl-match.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index 6603d8de501c..c599eb43eeb1 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -431,7 +431,8 @@ static const struct snd_soc_acpi_link_adr ptl_cs42l43_l3[] = { .mask = BIT(3), .num_adr = ARRAY_SIZE(cs42l43_3_adr), .adr_d = cs42l43_3_adr, - } + }, + {} }; static const struct snd_soc_acpi_link_adr ptl_rt722_only[] = { From bfb713ea53c746b07ae69fe97fa9b5388e4f34f9 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 17 Apr 2025 14:55:50 +0100 Subject: [PATCH 115/559] perf tools: Fix arm64 build by generating unistd_64.h Since pulling in the kernel changes in commit 22f72088ffe6 ("tools headers: Update the syscall table with the kernel sources"), arm64 is no longer using a generic syscall header and generates one from the syscall table. Therefore we must also generate the syscall header for arm64 before building Perf. Add it as a dependency to libperf which uses one syscall number. Perf uses more, but as libperf is a dependency of Perf it will be generated for both. Future platforms that need this will have to add their own syscall-y targets in libperf manually. Unfortunately the arch specific files that do this (e.g. arch/arm64/include/asm/Kbuild) can't easily be imported into the Perf build. But Perf only needs a subset of the generated files anyway, so redefining them is probably the correct thing to do. Fixes: 22f72088ffe6 ("tools headers: Update the syscall table with the kernel sources") Signed-off-by: James Clark Tested-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20250417-james-perf-fix-gen-syscall-v1-1-1d268c923901@linaro.org Signed-off-by: Namhyung Kim --- tools/lib/perf/Makefile | 12 +++++++++++- tools/perf/Makefile.config | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index ffcfd777c451..1a19b5013f45 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -42,6 +42,7 @@ libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) TEST_ARGS := $(if $(V),-v) INCLUDES = \ +-I$(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ @@ -99,7 +100,16 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -$(LIBPERF_IN): FORCE +uapi-asm := $(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi/asm +ifeq ($(SRCARCH),arm64) + syscall-y := $(uapi-asm)/unistd_64.h +endif +uapi-asm-generic: + $(if $(syscall-y),\ + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-headers obj=$(uapi-asm) \ + generic=include/uapi/asm-generic $(syscall-y),) + +$(LIBPERF_IN): uapi-asm-generic FORCE $(Q)$(MAKE) $(build)=libperf $(LIBPERF_A): $(LIBPERF_IN) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index eea95c6c0c71..a52482654d4b 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -29,6 +29,7 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated +CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) From e3f506b78d921e48a00d005bea5c45ec36a99240 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 23 Apr 2025 13:51:54 +0530 Subject: [PATCH 116/559] powerpc/boot: Fix dash warning 'commit b2accfe7ca5b ("powerpc/boot: Check for ld-option support")' suppressed linker warnings, but the expressed used did not go well with POSIX shell (dash) resulting with this warning arch/powerpc/boot/wrapper: 237: [: 0: unexpected operator ld: warning: arch/powerpc/boot/zImage.epapr has a LOAD segment with RWX permissions Fix the check to handle the reported warning. Patch also fixes couple of shellcheck reported errors for the same line. In arch/powerpc/boot/wrapper line 237: if [ $(${CROSS}ld -v --no-warn-rwx-segments &>/dev/null; echo $?) -eq 0 ]; then ^-- SC2046 (warning): Quote this to prevent word splitting. ^------^ SC2086 (info): Double quote to prevent globbing and word splitting. ^---------^ SC3020 (warning): In POSIX sh, &> is undefined. Fixes: b2accfe7ca5b ("powerpc/boot: Check for ld-option support") Reported-by: Stephen Rothwell Suggested-by: Stephen Rothwell Tested-by: Venkat Rao Bagalkote Reviewed-by: Stephen Rothwell Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250423082154.30625-1-maddy@linux.ibm.com --- arch/powerpc/boot/wrapper | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 267ca6d4d9b3..3d8dc822282a 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -234,7 +234,7 @@ fi # suppress some warnings in recent ld versions nowarn="-z noexecstack" -if [ $(${CROSS}ld -v --no-warn-rwx-segments &>/dev/null; echo $?) -eq 0 ]; then +if "${CROSS}ld" -v --no-warn-rwx-segments >/dev/null 2>&1; then nowarn="$nowarn --no-warn-rwx-segments" fi From c73c67026fe65d6677260dfd15dd968b709dc237 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 18 Apr 2025 21:39:02 +0200 Subject: [PATCH 117/559] fanotify: fix flush of mntns marks fanotify_mark(fd, FAN_MARK_FLUSH | FAN_MARK_MNTNS, ...) incorrectly ends up causing removal inode marks. Fixes: 0f46d81f2bce ("fanotify: notify on mount attach and detach") Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250418193903.2607617-2-amir73il@gmail.com --- fs/notify/fanotify/fanotify_user.c | 7 +------ include/linux/fsnotify_backend.h | 15 --------------- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index f2d840ae4ded..87f861e9004f 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1961,12 +1961,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; if (mark_cmd == FAN_MARK_FLUSH) { - if (mark_type == FAN_MARK_MOUNT) - fsnotify_clear_vfsmount_marks_by_group(group); - else if (mark_type == FAN_MARK_FILESYSTEM) - fsnotify_clear_sb_marks_by_group(group); - else - fsnotify_clear_inode_marks_by_group(group); + fsnotify_clear_marks_by_group(group, obj_type); return 0; } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 6cd8d1d28b8b..fc27b53c58c2 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -907,21 +907,6 @@ extern void fsnotify_wait_marks_destroyed(void); /* Clear all of the marks of a group attached to a given object type */ extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int obj_type); -/* run all the marks in a group, and clear all of the vfsmount marks */ -static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); -} -/* run all the marks in a group, and clear all of the inode marks */ -static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); -} -/* run all the marks in a group, and clear all of the sn marks */ -static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB); -} extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); From cd188e9ef80fd005fd8c8de34ed649bd653d00e5 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 18 Apr 2025 21:39:03 +0200 Subject: [PATCH 118/559] selftests/fs/mount-notify: test also remove/flush of mntns marks Regression test for FAN_MARK_MNTFS | FAN_MARK_FLUSH bug. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250418193903.2607617-3-amir73il@gmail.com --- .../mount-notify/mount-notify_test.c | 57 +++++++++++++++---- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c index 4a2d5c454fd1..59a71f22fb11 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -48,8 +48,16 @@ static uint64_t get_mnt_id(struct __test_metadata *const _metadata, static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; +static const int mark_cmds[] = { + FAN_MARK_ADD, + FAN_MARK_REMOVE, + FAN_MARK_FLUSH +}; + +#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds) + FIXTURE(fanotify) { - int fan_fd; + int fan_fd[NUM_FAN_FDS]; char buf[256]; unsigned int rem; void *next; @@ -61,7 +69,7 @@ FIXTURE(fanotify) { FIXTURE_SETUP(fanotify) { - int ret; + int i, ret; ASSERT_EQ(unshare(CLONE_NEWNS), 0); @@ -89,20 +97,34 @@ FIXTURE_SETUP(fanotify) self->root_id = get_mnt_id(_metadata, "/"); ASSERT_NE(self->root_id, 0); - self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0); - ASSERT_GE(self->fan_fd, 0); - - ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS, - FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL); - ASSERT_EQ(ret, 0); + for (i = 0; i < NUM_FAN_FDS; i++) { + self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK, + 0); + ASSERT_GE(self->fan_fd[i], 0); + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + // On fd[0] we do an extra ADD that changes nothing. + // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark. + ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + } self->rem = 0; } FIXTURE_TEARDOWN(fanotify) { + int i; + ASSERT_EQ(self->rem, 0); - close(self->fan_fd); + for (i = 0; i < NUM_FAN_FDS; i++) + close(self->fan_fd[i]); ASSERT_EQ(fchdir(self->orig_root), 0); @@ -123,8 +145,21 @@ static uint64_t expect_notify(struct __test_metadata *const _metadata, unsigned int thislen; if (!self->rem) { - ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf)); - ASSERT_GT(len, 0); + ssize_t len; + int i; + + for (i = NUM_FAN_FDS - 1; i >= 0; i--) { + len = read(self->fan_fd[i], self->buf, + sizeof(self->buf)); + if (i > 0) { + // Groups 1,2 should get EAGAIN + ASSERT_EQ(len, -1); + ASSERT_EQ(errno, EAGAIN); + } else { + // Group 0 should get events + ASSERT_GT(len, 0); + } + } self->rem = len; self->next = (void *) self->buf; From c1b0f5183a4488b6b7790f834ce3a786725b3583 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 10 Apr 2025 17:15:25 +0300 Subject: [PATCH 119/559] ASoC: renesas: rz-ssi: Use NOIRQ_SYSTEM_SLEEP_PM_OPS() In the latest kernel versions system crashes were noticed occasionally during suspend/resume. This occurs because the RZ SSI suspend trigger (called from snd_soc_suspend()) is executed after rz_ssi_pm_ops->suspend() and it accesses IP registers. After the rz_ssi_pm_ops->suspend() is executed the IP clocks are disabled and its reset line is asserted. Since snd_soc_suspend() is invoked through snd_soc_pm_ops->suspend(), snd_soc_pm_ops is associated with soc_driver (defined in sound/soc/soc-core.c), and there is no parent-child relationship between soc_driver and rz_ssi_driver the power management subsystem does not enforce a specific suspend/resume order between the RZ SSI platform driver and soc_driver. To ensure that the suspend/resume function of rz-ssi is executed after snd_soc_suspend(), use NOIRQ_SYSTEM_SLEEP_PM_OPS(). Fixes: 1fc778f7c833 ("ASoC: renesas: rz-ssi: Add suspend to RAM support") Cc: stable@vger.kernel.org Signed-off-by: Claudiu Beznea Link: https://patch.msgid.link/20250410141525.4126502-1-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Mark Brown --- sound/soc/renesas/rz-ssi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/renesas/rz-ssi.c b/sound/soc/renesas/rz-ssi.c index 3a0af4ca7ab6..0f7458a43901 100644 --- a/sound/soc/renesas/rz-ssi.c +++ b/sound/soc/renesas/rz-ssi.c @@ -1244,7 +1244,7 @@ static int rz_ssi_runtime_resume(struct device *dev) static const struct dev_pm_ops rz_ssi_pm_ops = { RUNTIME_PM_OPS(rz_ssi_runtime_suspend, rz_ssi_runtime_resume, NULL) - SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) + NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) }; static struct platform_driver rz_ssi_driver = { From 722a6ad4867ce8c4cb131a3371d0b5389a75dee0 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Wed, 23 Apr 2025 21:31:57 +0200 Subject: [PATCH 120/559] spi: spi-qpic-snand: propagate errors from qcom_spi_block_erase() The qcom_spi_block_erase() function returns with error in case of failure. Change the qcom_spi_send_cmdaddr() function to propagate these errors to the callers instead of returning with success. Fixes: 7304d1909080 ("spi: spi-qpic: add driver for QCOM SPI NAND flash Interface") Signed-off-by: Gabor Juhos Reviewed-by: Abel Vesa Link: https://patch.msgid.link/20250423-qpic-snand-propagate-error-v1-1-4b26ed45fdb5@gmail.com Reviewed-by: Md Sadre Alam Signed-off-by: Mark Brown --- drivers/spi/spi-qpic-snand.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 17eb67e19132..ae32c452d0bc 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -1307,8 +1307,7 @@ static int qcom_spi_send_cmdaddr(struct qcom_nand_controller *snandc, snandc->qspi->addr1 = cpu_to_le32(s_op.addr1_reg << 16); snandc->qspi->addr2 = cpu_to_le32(s_op.addr2_reg); snandc->qspi->cmd = cpu_to_le32(cmd); - qcom_spi_block_erase(snandc); - return 0; + return qcom_spi_block_erase(snandc); default: break; } From be8250786ca94952a19ce87f98ad9906448bc9ef Mon Sep 17 00:00:00 2001 From: Zhenhua Huang Date: Mon, 21 Apr 2025 15:52:32 +0800 Subject: [PATCH 121/559] mm, slab: clean up slab->obj_exts always When memory allocation profiling is disabled at runtime or due to an error, shutdown_mem_profiling() is called: slab->obj_exts which previously allocated remains. It won't be cleared by unaccount_slab() because of mem_alloc_profiling_enabled() not true. It's incorrect, slab->obj_exts should always be cleaned up in unaccount_slab() to avoid following error: [...]BUG: Bad page state in process... .. [...]page dumped because: page still charged to cgroup [andriy.shevchenko@linux.intel.com: fold need_slab_obj_ext() into its only user] Fixes: 21c690a349ba ("mm: introduce slabobj_ext to support slab object extensions") Cc: stable@vger.kernel.org Signed-off-by: Zhenhua Huang Acked-by: David Rientjes Acked-by: Harry Yoo Tested-by: Harry Yoo Acked-by: Suren Baghdasaryan Link: https://patch.msgid.link/20250421075232.2165527-1-quic_zhenhuah@quicinc.com Signed-off-by: Vlastimil Babka --- mm/slub.c | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index dc9e729e1d26..be8b09e09d30 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2028,8 +2028,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, return 0; } -/* Should be called only if mem_alloc_profiling_enabled() */ -static noinline void free_slab_obj_exts(struct slab *slab) +static inline void free_slab_obj_exts(struct slab *slab) { struct slabobj_ext *obj_exts; @@ -2049,18 +2048,6 @@ static noinline void free_slab_obj_exts(struct slab *slab) slab->obj_exts = 0; } -static inline bool need_slab_obj_ext(void) -{ - if (mem_alloc_profiling_enabled()) - return true; - - /* - * CONFIG_MEMCG creates vector of obj_cgroup objects conditionally - * inside memcg_slab_post_alloc_hook. No other users for now. - */ - return false; -} - #else /* CONFIG_SLAB_OBJ_EXT */ static inline void init_slab_obj_exts(struct slab *slab) @@ -2077,11 +2064,6 @@ static inline void free_slab_obj_exts(struct slab *slab) { } -static inline bool need_slab_obj_ext(void) -{ - return false; -} - #endif /* CONFIG_SLAB_OBJ_EXT */ #ifdef CONFIG_MEM_ALLOC_PROFILING @@ -2129,7 +2111,7 @@ __alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) static inline void alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) { - if (need_slab_obj_ext()) + if (mem_alloc_profiling_enabled()) __alloc_tagging_slab_alloc_hook(s, object, flags); } @@ -2601,8 +2583,12 @@ static __always_inline void account_slab(struct slab *slab, int order, static __always_inline void unaccount_slab(struct slab *slab, int order, struct kmem_cache *s) { - if (memcg_kmem_online() || need_slab_obj_ext()) - free_slab_obj_exts(slab); + /* + * The slab object extensions should now be freed regardless of + * whether mem_alloc_profiling_enabled() or not because profiling + * might have been disabled after slab->obj_exts got allocated. + */ + free_slab_obj_exts(slab); mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), -(PAGE_SIZE << order)); From 087a9eb9e5978e3ba362e1163691e41097e8ca20 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 23 Apr 2025 17:51:31 +0300 Subject: [PATCH 122/559] vxlan: vnifilter: Fix unlocked deletion of default FDB entry When a VNI is deleted from a VXLAN device in 'vnifilter' mode, the FDB entry associated with the default remote (assuming one was configured) is deleted without holding the hash lock. This is wrong and will result in a warning [1] being generated by the lockdep annotation that was added by commit ebe642067455 ("vxlan: Create wrappers for FDB lookup"). Reproducer: # ip link add vx0 up type vxlan dstport 4789 external vnifilter local 192.0.2.1 # bridge vni add vni 10010 remote 198.51.100.1 dev vx0 # bridge vni del vni 10010 dev vx0 Fix by acquiring the hash lock before the deletion and releasing it afterwards. Blame the original commit that introduced the issue rather than the one that exposed it. [1] WARNING: CPU: 3 PID: 392 at drivers/net/vxlan/vxlan_core.c:417 vxlan_find_mac+0x17f/0x1a0 [...] RIP: 0010:vxlan_find_mac+0x17f/0x1a0 [...] Call Trace: __vxlan_fdb_delete+0xbe/0x560 vxlan_vni_delete_group+0x2ba/0x940 vxlan_vni_del.isra.0+0x15f/0x580 vxlan_process_vni_filter+0x38b/0x7b0 vxlan_vnifilter_process+0x3bb/0x510 rtnetlink_rcv_msg+0x2f7/0xb70 netlink_rcv_skb+0x131/0x360 netlink_unicast+0x426/0x710 netlink_sendmsg+0x75a/0xc20 __sock_sendmsg+0xc1/0x150 ____sys_sendmsg+0x5aa/0x7b0 ___sys_sendmsg+0xfc/0x180 __sys_sendmsg+0x121/0x1b0 do_syscall_64+0xbb/0x1d0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device") Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250423145131.513029-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_vnifilter.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c index 6e6e9f05509a..06d19e90eadb 100644 --- a/drivers/net/vxlan/vxlan_vnifilter.c +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -627,7 +627,11 @@ static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, * default dst remote_ip previously added for this vni */ if (!vxlan_addr_any(&vninode->remote_ip) || - !vxlan_addr_any(&dst->remote_ip)) + !vxlan_addr_any(&dst->remote_ip)) { + u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, + vninode->vni); + + spin_lock_bh(&vxlan->hash_lock[hash_index]); __vxlan_fdb_delete(vxlan, all_zeros_mac, (vxlan_addr_any(&vninode->remote_ip) ? dst->remote_ip : vninode->remote_ip), @@ -635,6 +639,8 @@ static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, vninode->vni, vninode->vni, dst->remote_ifindex, true); + spin_unlock_bh(&vxlan->hash_lock[hash_index]); + } if (vxlan->dev->flags & IFF_UP) { if (vxlan_addr_multicast(&vninode->remote_ip) && From a2620f8932fa9fdabc3d78ed6efb004ca409019f Mon Sep 17 00:00:00 2001 From: Mikhail Lobanov Date: Mon, 14 Apr 2025 20:12:06 +0300 Subject: [PATCH 123/559] KVM: SVM: Forcibly leave SMM mode on SHUTDOWN interception Previously, commit ed129ec9057f ("KVM: x86: forcibly leave nested mode on vCPU reset") addressed an issue where a triple fault occurring in nested mode could lead to use-after-free scenarios. However, the commit did not handle the analogous situation for System Management Mode (SMM). This omission results in triggering a WARN when KVM forces a vCPU INIT after SHUTDOWN interception while the vCPU is in SMM. This situation was reprodused using Syzkaller by: 1) Creating a KVM VM and vCPU 2) Sending a KVM_SMI ioctl to explicitly enter SMM 3) Executing invalid instructions causing consecutive exceptions and eventually a triple fault The issue manifests as follows: WARNING: CPU: 0 PID: 25506 at arch/x86/kvm/x86.c:12112 kvm_vcpu_reset+0x1d2/0x1530 arch/x86/kvm/x86.c:12112 Modules linked in: CPU: 0 PID: 25506 Comm: syz-executor.0 Not tainted 6.1.130-syzkaller-00157-g164fe5dde9b6 #0 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 RIP: 0010:kvm_vcpu_reset+0x1d2/0x1530 arch/x86/kvm/x86.c:12112 Call Trace: shutdown_interception+0x66/0xb0 arch/x86/kvm/svm/svm.c:2136 svm_invoke_exit_handler+0x110/0x530 arch/x86/kvm/svm/svm.c:3395 svm_handle_exit+0x424/0x920 arch/x86/kvm/svm/svm.c:3457 vcpu_enter_guest arch/x86/kvm/x86.c:10959 [inline] vcpu_run+0x2c43/0x5a90 arch/x86/kvm/x86.c:11062 kvm_arch_vcpu_ioctl_run+0x50f/0x1cf0 arch/x86/kvm/x86.c:11283 kvm_vcpu_ioctl+0x570/0xf00 arch/x86/kvm/../../../virt/kvm/kvm_main.c:4122 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __x64_sys_ioctl+0x19a/0x210 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x35/0x80 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Architecturally, INIT is blocked when the CPU is in SMM, hence KVM's WARN() in kvm_vcpu_reset() to guard against KVM bugs, e.g. to detect improper emulation of INIT. SHUTDOWN on SVM is a weird edge case where KVM needs to do _something_ sane with the VMCB, since it's technically undefined, and INIT is the least awful choice given KVM's ABI. So, double down on stuffing INIT on SHUTDOWN, and force the vCPU out of SMM to avoid any weirdness (and the WARN). Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: ed129ec9057f ("KVM: x86: forcibly leave nested mode on vCPU reset") Cc: stable@vger.kernel.org Suggested-by: Sean Christopherson Signed-off-by: Mikhail Lobanov Link: https://lore.kernel.org/r/20250414171207.155121-1-m.lobanov@rosa.ru [sean: massage changelog, make it clear this isn't architectural behavior] Signed-off-by: Sean Christopherson --- arch/x86/kvm/smm.c | 1 + arch/x86/kvm/svm/svm.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index 699e551ec93b..9864c057187d 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -131,6 +131,7 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm) kvm_mmu_reset_context(vcpu); } +EXPORT_SYMBOL_GPL(kvm_smm_changed); void process_smi(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d5d0c5c3300b..c5470d842aed 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2231,6 +2231,10 @@ static int shutdown_interception(struct kvm_vcpu *vcpu) */ if (!sev_es_guest(vcpu->kvm)) { clear_page(svm->vmcb); +#ifdef CONFIG_KVM_SMM + if (is_smm(vcpu)) + kvm_smm_changed(vcpu, false); +#endif kvm_vcpu_reset(vcpu, true); } From a476cadf8ef1fbb9780581316f0199dfc62a81f2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 24 Mar 2025 13:51:28 +0300 Subject: [PATCH 124/559] KVM: x86: Check that the high 32bits are clear in kvm_arch_vcpu_ioctl_run() The "kvm_run->kvm_valid_regs" and "kvm_run->kvm_dirty_regs" variables are u64 type. We are only using the lowest 3 bits but we want to ensure that the users are not passing invalid bits so that we can use the remaining bits in the future. However "sync_valid_fields" and kvm_sync_valid_fields() are u32 type so the check only ensures that the lower 32 bits are clear. Fix this by changing the types to u64. Fixes: 74c1807f6c4f ("KVM: x86: block KVM_CAP_SYNC_REGS if guest state is protected") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/ec25aad1-113e-4c6e-8941-43d432251398@stanley.mountain Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index df5b99ea1f18..9896fd574bfc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4597,7 +4597,7 @@ static bool kvm_is_vm_type_supported(unsigned long type) return type < 32 && (kvm_caps.supported_vm_types & BIT(type)); } -static inline u32 kvm_sync_valid_fields(struct kvm *kvm) +static inline u64 kvm_sync_valid_fields(struct kvm *kvm) { return kvm && kvm->arch.has_protected_state ? 0 : KVM_SYNC_X86_VALID_FIELDS; } @@ -11493,7 +11493,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { struct kvm_queued_exception *ex = &vcpu->arch.exception; struct kvm_run *kvm_run = vcpu->run; - u32 sync_valid_fields; + u64 sync_valid_fields; int r; r = kvm_mmu_post_init_vm(vcpu->kvm); From a1356ac7749cafc4e27aa62c0c4604b5dca4983e Mon Sep 17 00:00:00 2001 From: "e.kubanski" Date: Wed, 16 Apr 2025 12:19:08 +0200 Subject: [PATCH 125/559] xsk: Fix race condition in AF_XDP generic RX path Move rx_lock from xsk_socket to xsk_buff_pool. Fix synchronization for shared umem mode in generic RX path where multiple sockets share single xsk_buff_pool. RX queue is exclusive to xsk_socket, while FILL queue can be shared between multiple sockets. This could result in race condition where two CPU cores access RX path of two different sockets sharing the same umem. Protect both queues by acquiring spinlock in shared xsk_buff_pool. Lock contention may be minimized in the future by some per-thread FQ buffering. It's safe and necessary to move spin_lock_bh(rx_lock) after xsk_rcv_check(): * xs->pool and spinlock_init is synchronized by xsk_bind() -> xsk_is_bound() memory barriers. * xsk_rcv_check() may return true at the moment of xsk_release() or xsk_unbind_dev(), however this will not cause any data races or race conditions. xsk_unbind_dev() removes xdp socket from all maps and waits for completion of all outstanding rx operations. Packets in RX path will either complete safely or drop. Signed-off-by: Eryk Kubanski Fixes: bf0bdd1343efb ("xdp: fix race on generic receive path") Acked-by: Magnus Karlsson Link: https://patch.msgid.link/20250416101908.10919-1-e.kubanski@partner.samsung.com Signed-off-by: Jakub Kicinski --- include/net/xdp_sock.h | 3 --- include/net/xsk_buff_pool.h | 2 ++ net/xdp/xsk.c | 6 +++--- net/xdp/xsk_buff_pool.c | 1 + 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index a58ae7589d12..e8bd6ddb7b12 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -71,9 +71,6 @@ struct xdp_sock { */ u32 tx_budget_spent; - /* Protects generic receive. */ - spinlock_t rx_lock; - /* Statistics */ u64 rx_dropped; u64 rx_queue_full; diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 1dcd4d71468a..3b243ea70e38 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -53,6 +53,8 @@ struct xsk_buff_pool { refcount_t users; struct xdp_umem *umem; struct work_struct work; + /* Protects generic receive in shared and non-shared umem mode. */ + spinlock_t rx_lock; struct list_head free_list; struct list_head xskb_list; u32 heads_cnt; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 5696af45bcf7..4abc81f33d3e 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -338,13 +338,14 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) u32 len = xdp_get_buff_len(xdp); int err; - spin_lock_bh(&xs->rx_lock); err = xsk_rcv_check(xs, xdp, len); if (!err) { + spin_lock_bh(&xs->pool->rx_lock); err = __xsk_rcv(xs, xdp, len); xsk_flush(xs); + spin_unlock_bh(&xs->pool->rx_lock); } - spin_unlock_bh(&xs->rx_lock); + return err; } @@ -1734,7 +1735,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, xs = xdp_sk(sk); xs->state = XSK_READY; mutex_init(&xs->mutex); - spin_lock_init(&xs->rx_lock); INIT_LIST_HEAD(&xs->map_list); spin_lock_init(&xs->map_list_lock); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 25a76c5ce0f1..c5181a9044ad 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -89,6 +89,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, pool->addrs = umem->addrs; pool->tx_metadata_len = umem->tx_metadata_len; pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM; + spin_lock_init(&pool->rx_lock); INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->xskb_list); INIT_LIST_HEAD(&pool->xsk_tx_list); From bf20af07909925ec0ae6cd4f3b7be0279dfa8768 Mon Sep 17 00:00:00 2001 From: "e.kubanski" Date: Wed, 16 Apr 2025 13:29:25 +0200 Subject: [PATCH 126/559] xsk: Fix offset calculation in unaligned mode Bring back previous offset calculation behaviour in AF_XDP unaligned umem mode. In unaligned mode, upper 16 bits should contain data offset, lower 48 bits should contain only specific chunk location without offset. Remove pool->headroom duplication into 48bit address. Signed-off-by: Eryk Kubanski Fixes: bea14124bacb ("xsk: Get rid of xdp_buff_xsk::orig_addr") Acked-by: Magnus Karlsson Link: https://patch.msgid.link/20250416112925.7501-1-e.kubanski@partner.samsung.com Signed-off-by: Jakub Kicinski --- include/net/xsk_buff_pool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 3b243ea70e38..cac56e6b0869 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -240,8 +240,8 @@ static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb, return orig_addr; offset = xskb->xdp.data - xskb->xdp.data_hard_start; - orig_addr -= offset; offset += pool->headroom; + orig_addr -= offset; return orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); } From eacc77a73275895eca0e3655dc6c671853500e2e Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Wed, 23 Apr 2025 11:36:07 +0300 Subject: [PATCH 127/559] net/mlx5e: Use custom tunnel header for vxlan gbp Symbolic (e.g. "vxlan") and custom (e.g. "tunnel_header_0") tunnels cannot be combined, but the match params interface does not have fields for matching on vxlan gbp. To match vxlan bgp, the tc_tun layer uses tunnel_header_0. Allow matching on both VNI and GBP by matching the VNI with a custom tunnel header instead of the symbolic field name. Matching solely on the VNI continues to use the symbolic field name. Fixes: 74a778b4a63f ("net/mlx5: HWS, added definers handling") Signed-off-by: Vlad Dogaru Reviewed-by: Yevgeny Kliteynik Signed-off-by: Mark Bloch Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250423083611.324567-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en/tc_tun_vxlan.c | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index 5c762a71818d..7a18a469961d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -165,9 +165,6 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, struct flow_match_enc_keyid enc_keyid; void *misc_c, *misc_v; - misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) return 0; @@ -182,6 +179,30 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, err = mlx5e_tc_tun_parse_vxlan_gbp_option(priv, spec, f); if (err) return err; + + /* We can't mix custom tunnel headers with symbolic ones and we + * don't have a symbolic field name for GBP, so we use custom + * tunnel headers in this case. We need hardware support to + * match on custom tunnel headers, but we already know it's + * supported because the previous call successfully checked for + * that. + */ + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_5); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_5); + + /* Shift by 8 to account for the reserved bits in the vxlan + * header after the VNI. + */ + MLX5_SET(fte_match_set_misc5, misc_c, tunnel_header_1, + be32_to_cpu(enc_keyid.mask->keyid) << 8); + MLX5_SET(fte_match_set_misc5, misc_v, tunnel_header_1, + be32_to_cpu(enc_keyid.key->keyid) << 8); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; + + return 0; } /* match on VNI is required */ @@ -195,6 +216,11 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, be32_to_cpu(enc_keyid.mask->keyid)); MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, From 5d1a04f347e6cbf5ffe74da409a5d71fbe8c5f19 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 23 Apr 2025 11:36:08 +0300 Subject: [PATCH 128/559] net/mlx5: E-Switch, Initialize MAC Address for Default GID Initialize the source MAC address when creating the default GID entry. Since this entry is used only for loopback traffic, it only needs to be a unicast address. A zeroed-out MAC address is sufficient for this purpose. Without this fix, random bits would be assigned as the source address. If these bits formed a multicast address, the firmware would return an error, preventing the user from switching to switchdev mode: Error: mlx5_core: Failed setting eswitch to offloads. kernel answers: Invalid argument Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic") Signed-off-by: Maor Gottlieb Signed-off-by: Mark Bloch Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250423083611.324567-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index a42f6cd99b74..f585ef5a3424 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -118,8 +118,8 @@ static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid * static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) { + u8 mac[ETH_ALEN] = {}; union ib_gid gid; - u8 mac[ETH_ALEN]; mlx5_rdma_make_default_gid(dev, &gid); return mlx5_core_roce_gid_set(dev, 0, From 172c034264c894518c012387f2de2f9d6443505d Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Wed, 23 Apr 2025 11:36:09 +0300 Subject: [PATCH 129/559] net/mlx5e: TC, Continue the attr process even if encap entry is invalid Previously the offload of the rule with header rewrite and mirror to both internal and external destinations is skipped if the encap entry is not valid. But it shouldn't because driver will try to offload it again if neighbor is updated and encap entry is valid, to replace the old FTE added for slow path. But the extra split attr doesn't exist at that time as the process is skipped, driver then fails to offload it. To fix this issue, remove the checking and continue the attr process if encap entry is invalid. Fixes: b11bde56246e ("net/mlx5e: TC, Offload rewrite and mirror to both internal and external dests") Signed-off-by: Jianbo Liu Reviewed-by: Cosmin Ratiu Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250423083611.324567-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9ba99609999f..f1d908f61134 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1750,9 +1750,6 @@ extra_split_attr_dests_needed(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr !list_is_first(&attr->list, &flow->attrs)) return 0; - if (flow_flag_test(flow, SLOW)) - return 0; - esw_attr = attr->esw_attr; if (!esw_attr->split_count || esw_attr->split_count == esw_attr->out_count - 1) @@ -1766,7 +1763,7 @@ extra_split_attr_dests_needed(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr for (i = esw_attr->split_count; i < esw_attr->out_count; i++) { /* external dest with encap is considered as internal by firmware */ if (esw_attr->dests[i].vport == MLX5_VPORT_UPLINK && - !(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) + !(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) ext_dest = true; else int_dest = true; From 1c2940ec0ddf51c689ee9ab85ead85c11b77809d Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Wed, 23 Apr 2025 11:36:10 +0300 Subject: [PATCH 130/559] net/mlx5e: Fix lock order in mlx5e_tx_reporter_ptpsq_unhealthy_recover RTNL needs to be acquired before state_lock. Fixes: fdce06bda7e5 ("net/mlx5e: Acquire RTNL lock before RQs/SQs activation/deactivation") Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250423083611.324567-5-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 532c7fa94d17..dbd9482359e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -176,6 +176,7 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx) priv = ptpsq->txqsq.priv; + rtnl_lock(); mutex_lock(&priv->state_lock); chs = &priv->channels; netdev = priv->netdev; @@ -183,22 +184,19 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx) carrier_ok = netif_carrier_ok(netdev); netif_carrier_off(netdev); - rtnl_lock(); mlx5e_deactivate_priv_channels(priv); - rtnl_unlock(); mlx5e_ptp_close(chs->ptp); err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); - rtnl_lock(); mlx5e_activate_priv_channels(priv); - rtnl_unlock(); /* return carrier back if needed */ if (carrier_ok) netif_carrier_on(netdev); mutex_unlock(&priv->state_lock); + rtnl_unlock(); return err; } From 90538d23278a981e344d364e923162fce752afeb Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 23 Apr 2025 11:36:11 +0300 Subject: [PATCH 131/559] net/mlx5: E-switch, Fix error handling for enabling roce The cited commit assumes enabling roce always succeeds. But it is not true. Add error handling for it. Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Reviewed-by: Maor Gottlieb Signed-off-by: Mark Bloch Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250423083611.324567-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 5 ++++- drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 9 +++++---- drivers/net/ethernet/mellanox/mlx5/core/rdma.h | 4 ++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a6a8eea5980c..0e3a977d5332 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3533,7 +3533,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) int err; mutex_init(&esw->offloads.termtbl_mutex); - mlx5_rdma_enable_roce(esw->dev); + err = mlx5_rdma_enable_roce(esw->dev); + if (err) + goto err_roce; err = mlx5_esw_host_number_init(esw); if (err) @@ -3594,6 +3596,7 @@ err_vport_metadata: esw_offloads_metadata_uninit(esw); err_metadata: mlx5_rdma_disable_roce(esw->dev); +err_roce: mutex_destroy(&esw->offloads.termtbl_mutex); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index f585ef5a3424..5c552b71e371 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -140,17 +140,17 @@ void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) mlx5_nic_vport_disable_roce(dev); } -void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { int err; if (!MLX5_CAP_GEN(dev, roce)) - return; + return 0; err = mlx5_nic_vport_enable_roce(dev); if (err) { mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err); - return; + return err; } err = mlx5_rdma_add_roce_addr(dev); @@ -165,10 +165,11 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) goto del_roce_addr; } - return; + return err; del_roce_addr: mlx5_rdma_del_roce_addr(dev); disable_roce: mlx5_nic_vport_disable_roce(dev); + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h index 750cff2a71a4..3d9e76c3d42f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h @@ -8,12 +8,12 @@ #ifdef CONFIG_MLX5_ESWITCH -void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ -static inline void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) {} +static inline int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {} #endif /* CONFIG_MLX5_ESWITCH */ From 1526a735a7620db8b22ea3d24d3ba7ac262b2aaf Mon Sep 17 00:00:00 2001 From: Michael Riesch Date: Thu, 10 Apr 2025 21:41:30 +0200 Subject: [PATCH 132/559] MAINTAINERS: add exclude for dt-bindings to imx entry Since the IMX (as in i.MX, the NXP SoCs) MAINTAINERS entry claims everything that contains the name "imx", hanges to device tree bindings for any Sony IMX image sensor are likely to be sent to the maintainers listed therein. Add the missing exclude to fix that. Fixes: da8b7f0fb02b ("MAINTAINERS: add all files matching "imx" and "mxs" to the IMX entry") Suggested-by: Sebastian Reichel Signed-off-by: Michael Riesch Reviewed-by: Sakari Ailus Signed-off-by: Shawn Guo --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 96b827049501..c87b26eada7b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2519,6 +2519,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git F: arch/arm/boot/dts/nxp/imx/ F: arch/arm/boot/dts/nxp/mxs/ F: arch/arm64/boot/dts/freescale/ +X: Documentation/devicetree/bindings/media/i2c/ X: arch/arm64/boot/dts/freescale/fsl-* X: arch/arm64/boot/dts/freescale/qoriq-* X: drivers/media/i2c/ From a32f1923c6d6e9e727d00558a15ec0af6639de19 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 24 Apr 2025 22:15:50 +0200 Subject: [PATCH 133/559] crypto: scompress - increment scomp_scratch_users when already allocated Commit ddd0a42671c0 only increments scomp_scratch_users when it was 0, causing a panic when using ipcomp: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 1 UID: 0 PID: 619 Comm: ping Tainted: G N 6.15.0-rc3-net-00032-ga79be02bba5c #41 PREEMPT(full) Tainted: [N]=TEST Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014 RIP: 0010:inflate_fast+0x5a2/0x1b90 [...] Call Trace: zlib_inflate+0x2d60/0x6620 deflate_sdecompress+0x166/0x350 scomp_acomp_comp_decomp+0x45f/0xa10 scomp_acomp_decompress+0x21/0x120 acomp_do_req_chain+0x3e5/0x4e0 ipcomp_input+0x212/0x550 xfrm_input+0x2de2/0x72f0 [...] Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Instead, let's keep the old increment, and decrement back to 0 if the scratch allocation fails. Fixes: ddd0a42671c0 ("crypto: scompress - Fix scratch allocation failure handling") Signed-off-by: Sabrina Dubroca Signed-off-by: Herbert Xu --- crypto/scompress.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/scompress.c b/crypto/scompress.c index 36934c78d127..ffeedcf20b0f 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -163,11 +163,10 @@ static int crypto_scomp_init_tfm(struct crypto_tfm *tfm) if (ret) goto unlock; } - if (!scomp_scratch_users) { + if (!scomp_scratch_users++) { ret = crypto_scomp_alloc_scratches(); if (ret) - goto unlock; - scomp_scratch_users++; + scomp_scratch_users--; } unlock: mutex_unlock(&scomp_lock); From 9bbb8a07fd65fca0f29a869ec3f2435761a6c676 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Mon, 2 Dec 2024 11:19:55 +0100 Subject: [PATCH 134/559] tools/hv: update route parsing in kvp daemon After recent changes in the VM network stack, the host fails to display the IP addresses of the VM. As a result the "IP Addresses" column in the "Networking" tab in the Windows Hyper-V Manager is empty. This is caused by a change in the expected output of the "ip route show" command. Previously the gateway address was shown in the third row. Now the gateway addresses might be split into several lines of output. As a result, the string "ra" instead of an IP address is sent to the host. To me more specific, a VM with the wellknown wicked network managing tool still shows the expected output in recent openSUSE Tumbleweed snapshots: ip a show dev uplink;ip -4 route show;ip -6 route show 2: uplink: mtu 1500 qdisc mq state ... link/ether 00:15:5d:d0:93:08 brd ff:ff:ff:ff:ff:ff inet 1.2.3.4/22 brd 1.2.3.255 scope global uplink valid_lft forever preferred_lft forever inet6 fe80::215:5dff:fed0:9308/64 scope link proto kernel_ll valid_lft forever preferred_lft forever default via 1.2.3.254 dev uplink proto dhcp 1.2.3.0/22 dev uplink proto kernel scope link src 1.2.3.4 fe80::/64 dev uplink proto kernel metric 256 pref medium default via fe80::26fc:4e00:3b:74 dev uplink proto ra metric 1024 exp... default via fe80::6a22:8e00:fb:14f8 dev uplink proto ra metric 1024 e... A similar VM, but with NetworkManager as network managing tool: ip a show dev eth0;ip -4 route show;ip -6 route show 2: eth0: mtu 1500 qdisc mq state UP... link/ether 00:15:5d:d0:93:0b brd ff:ff:ff:ff:ff:ff inet 1.2.3.8/22 brd 1.2.3.255 scope global dynamic noprefixroute ... valid_lft 1022sec preferred_lft 1022sec inet6 fe80::215:5dff:fed0:930b/64 scope link noprefixroute valid_lft forever preferred_lft forever default via 1.2.3.254 dev eth0 proto dhcp src 1.2.3.8 metric 100 1.2.3.0/22 dev eth0 proto kernel scope link src 1.2.3.8 metric 100 fe80::/64 dev eth0 proto kernel metric 1024 pref medium default proto ra metric 20100 pref medium nexthop via fe80::6a22:8e00:fb:14f8 dev eth0 weight 1 nexthop via fe80::26fc:4e00:3b:74 dev eth0 weight 1 Adjust the route parsing to use a single line for each line of output. Also use a single shell invocation to retrieve both IPv4 and IPv6 information. The actual IP addresses are expected after the "via" keyword. Signed-off-by: Olaf Hering Reviewed-by: Shradha Gupta Link: https://lore.kernel.org/r/20241202102235.9701-1-olaf@aepfle.de Signed-off-by: Wei Liu Message-ID: <20241202102235.9701-1-olaf@aepfle.de> --- tools/hv/hv_kvp_daemon.c | 108 ++++++++++++++++++++++++++++++--------- 1 file changed, 84 insertions(+), 24 deletions(-) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 04ba035d67e9..b9ce3aab15fe 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -677,6 +678,88 @@ static void kvp_process_ipconfig_file(char *cmd, pclose(file); } +static bool kvp_verify_ip_address(const void *address_string) +{ + char verify_buf[sizeof(struct in6_addr)]; + + if (inet_pton(AF_INET, address_string, verify_buf) == 1) + return true; + if (inet_pton(AF_INET6, address_string, verify_buf) == 1) + return true; + return false; +} + +static void kvp_extract_routes(const char *line, void **output, size_t *remaining) +{ + static const char needle[] = "via "; + const char *match, *haystack = line; + + while ((match = strstr(haystack, needle))) { + const char *address, *next_char; + + /* Address starts after needle. */ + address = match + strlen(needle); + + /* The char following address is a space or end of line. */ + next_char = strpbrk(address, " \t\\"); + if (!next_char) + next_char = address + strlen(address) + 1; + + /* Enough room for address and semicolon. */ + if (*remaining >= (next_char - address) + 1) { + memcpy(*output, address, next_char - address); + /* Terminate string for verification. */ + memcpy(*output + (next_char - address), "", 1); + if (kvp_verify_ip_address(*output)) { + /* Advance output buffer. */ + *output += next_char - address; + *remaining -= next_char - address; + + /* Each address needs a trailing semicolon. */ + memcpy(*output, ";", 1); + *output += 1; + *remaining -= 1; + } + } + haystack = next_char; + } +} + +static void kvp_get_gateway(void *buffer, size_t buffer_len) +{ + static const char needle[] = "default "; + FILE *f; + void *output = buffer; + char *line = NULL; + size_t alloc_size = 0, remaining = buffer_len - 1; + ssize_t num_chars; + + /* Show route information in a single line, for each address family */ + f = popen("ip --oneline -4 route show;ip --oneline -6 route show", "r"); + if (!f) { + /* Convert buffer into C-String. */ + memcpy(output, "", 1); + return; + } + while ((num_chars = getline(&line, &alloc_size, f)) > 0) { + /* Skip short lines. */ + if (num_chars <= strlen(needle)) + continue; + /* Skip lines without default route. */ + if (memcmp(line, needle, strlen(needle))) + continue; + /* Remove trailing newline to simplify further parsing. */ + if (line[num_chars - 1] == '\n') + line[num_chars - 1] = '\0'; + /* Search routes after match. */ + kvp_extract_routes(line + strlen(needle), &output, &remaining); + } + /* Convert buffer into C-String. */ + memcpy(output, "", 1); + free(line); + pclose(f); +} + static void kvp_get_ipconfig_info(char *if_name, struct hv_kvp_ipaddr_value *buffer) { @@ -685,30 +768,7 @@ static void kvp_get_ipconfig_info(char *if_name, char *p; FILE *file; - /* - * Get the address of default gateway (ipv4). - */ - sprintf(cmd, "%s %s", "ip route show dev", if_name); - strcat(cmd, " | awk '/default/ {print $3 }'"); - - /* - * Execute the command to gather gateway info. - */ - kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, - (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0); - - /* - * Get the address of default gateway (ipv6). - */ - sprintf(cmd, "%s %s", "ip -f inet6 route show dev", if_name); - strcat(cmd, " | awk '/default/ {print $3 }'"); - - /* - * Execute the command to gather gateway info (ipv6). - */ - kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, - (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1); - + kvp_get_gateway(buffer->gate_way, sizeof(buffer->gate_way)); /* * Gather the DNS state. From e53e004e346062e15df9511bd4b5a19e34701384 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Wed, 16 Apr 2025 12:26:16 +0200 Subject: [PATCH 135/559] accel/ivpu: Correct DCT interrupt handling Fix improper use of dct_active_percent field in DCT interrupt handler causing DCT to never get enabled. Set dct_active_percent internally before IPC to ensure correct driver value even if IPC fails. Set default DCT value to 30 accordingly to HW architecture specification. Fixes: a19bffb10c46 ("accel/ivpu: Implement DCT handling") Signed-off-by: Karol Wachowski Signed-off-by: Maciej Falkowski Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250416102616.384577-1-maciej.falkowski@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_btrs.h | 2 +- drivers/accel/ivpu/ivpu_pm.c | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h index 300f749971d4..d2d82651976d 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.h +++ b/drivers/accel/ivpu/ivpu_hw_btrs.h @@ -14,7 +14,7 @@ #define PLL_PROFILING_FREQ_DEFAULT 38400000 #define PLL_PROFILING_FREQ_HIGH 400000000 -#define DCT_DEFAULT_ACTIVE_PERCENT 15u +#define DCT_DEFAULT_ACTIVE_PERCENT 30u #define DCT_PERIOD_US 35300u int ivpu_hw_btrs_info_init(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index b5891e91f7ab..ac0e22454596 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -428,16 +428,17 @@ int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent) active_us = (DCT_PERIOD_US * active_percent) / 100; inactive_us = DCT_PERIOD_US - active_us; + vdev->pm->dct_active_percent = active_percent; + + ivpu_dbg(vdev, PM, "DCT requested %u%% (D0: %uus, D0i2: %uus)\n", + active_percent, active_us, inactive_us); + ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us); if (ret) { ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret); return ret; } - vdev->pm->dct_active_percent = active_percent; - - ivpu_dbg(vdev, PM, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n", - active_percent, active_us, inactive_us); return 0; } @@ -445,15 +446,16 @@ int ivpu_pm_dct_disable(struct ivpu_device *vdev) { int ret; + vdev->pm->dct_active_percent = 0; + + ivpu_dbg(vdev, PM, "DCT requested to be disabled\n"); + ret = ivpu_jsm_dct_disable(vdev); if (ret) { ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret); return ret; } - vdev->pm->dct_active_percent = 0; - - ivpu_dbg(vdev, PM, "DCT disabled\n"); return 0; } @@ -466,7 +468,7 @@ void ivpu_pm_irq_dct_work_fn(struct work_struct *work) if (ivpu_hw_btrs_dct_get_request(vdev, &enable)) return; - if (vdev->pm->dct_active_percent) + if (enable) ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT); else ret = ivpu_pm_dct_disable(vdev); From 759ee400d1d95ac903d81a1a229a117be822d077 Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Wed, 16 Apr 2025 12:26:29 +0200 Subject: [PATCH 136/559] accel/ivpu: Fix the D0i2 disable test mode Correct setup of D0i2 disable which was by mistake set up to value 1 and use BIT(1) instead. Fixes: 011529fe8112 ("accel/ivpu: Implement D0i2 disable test mode") Signed-off-by: Andrzej Kacprowski Signed-off-by: Maciej Falkowski Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250416102629.384626-1-maciej.falkowski@linux.intel.com --- drivers/accel/ivpu/ivpu_fw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 5e1d709c6a46..ccaaf6c100c0 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -544,7 +544,7 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->d0i3_entry_vpu_ts); ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n", boot_params->system_time_us); - ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = %u\n", + ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = 0x%x\n", boot_params->power_profile); } @@ -646,7 +646,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->d0i3_residency_time_us = 0; boot_params->d0i3_entry_vpu_ts = 0; if (IVPU_WA(disable_d0i2)) - boot_params->power_profile = 1; + boot_params->power_profile |= BIT(1); boot_params->system_time_us = ktime_to_us(ktime_get_real()); wmb(); /* Flush WC buffers after writing bootparams */ From 71cfb1f88f772fb92a68a4ab85b16ccd5cc8535d Mon Sep 17 00:00:00 2001 From: Zixian Zeng Date: Fri, 25 Apr 2025 10:28:12 +0800 Subject: [PATCH 137/559] spi: dt-bindings: snps,dw-apb-ssi: Merge duplicate compatible entry Microsemi Ocelot/Jaguar2, Renesas RZ/N1 and T-HEAD TH1520 SoC-specific compatibles, which eventually fallback to the generic DW ssi compatible, it's better to combine them in single entry Suggested-by: Rob Herring Signed-off-by: Zixian Zeng Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250425-sfg-spi-v6-1-2dbe7bb46013@gmail.com Signed-off-by: Mark Brown --- .../bindings/spi/snps,dw-apb-ssi.yaml | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml index bccd00a1ddd0..a43d2fb9942d 100644 --- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml +++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml @@ -56,19 +56,17 @@ properties: enum: - snps,dw-apb-ssi - snps,dwc-ssi-1.01a - - description: Microsemi Ocelot/Jaguar2 SoC SPI Controller - items: - - enum: - - mscc,ocelot-spi - - mscc,jaguar2-spi - - const: snps,dw-apb-ssi - description: Microchip Sparx5 SoC SPI Controller const: microchip,sparx5-spi - description: Amazon Alpine SPI Controller const: amazon,alpine-dw-apb-ssi - - description: Renesas RZ/N1 SPI Controller + - description: Vendor controllers which use snps,dw-apb-ssi as fallback items: - - const: renesas,rzn1-spi + - enum: + - mscc,ocelot-spi + - mscc,jaguar2-spi + - renesas,rzn1-spi + - thead,th1520-spi - const: snps,dw-apb-ssi - description: Intel Keem Bay SPI Controller const: intel,keembay-ssi @@ -88,10 +86,6 @@ properties: - renesas,r9a06g032-spi # RZ/N1D - renesas,r9a06g033-spi # RZ/N1S - const: renesas,rzn1-spi # RZ/N1 - - description: T-HEAD TH1520 SoC SPI Controller - items: - - const: thead,th1520-spi - - const: snps,dw-apb-ssi reg: minItems: 1 From 0889c4d28ad79b55ee8cf3c818e9d86203ace8f0 Mon Sep 17 00:00:00 2001 From: Zixian Zeng Date: Fri, 25 Apr 2025 10:28:13 +0800 Subject: [PATCH 138/559] spi: dt-bindings: snps,dw-apb-ssi: Add compatible for SOPHGO SG2042 SoC Sophgo SG2042 ships an SPI controller [1] compatible with the Synopsys DW-SPI IP. Add SoC-specific compatible string and use the generic one as fallback. Link: https://github.com/sophgo/sophgo-doc/blob/main/SG2042/TRM/source/SPI.rst [1] Signed-off-by: Zixian Zeng Acked-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250425-sfg-spi-v6-2-2dbe7bb46013@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml index a43d2fb9942d..53d00ca643b3 100644 --- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml +++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml @@ -66,6 +66,7 @@ properties: - mscc,ocelot-spi - mscc,jaguar2-spi - renesas,rzn1-spi + - sophgo,sg2042-spi - thead,th1520-spi - const: snps,dw-apb-ssi - description: Intel Keem Bay SPI Controller From 8e4d3d8a5e51e07bd0d6cdd81b5e4af79f796927 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 24 Apr 2025 17:43:33 +0530 Subject: [PATCH 139/559] spi: spi-mem: Add fix to avoid divide error For some SPI flash memory operations, dummy bytes are not mandatory. For example, in Winbond SPINAND flash memory devices, the `write_cache` and `update_cache` operation variants have zero dummy bytes. Calculating the duration for SPI memory operations with zero dummy bytes causes a divide error when `ncycles` is calculated in the spi_mem_calc_op_duration(). Add changes to skip the 'ncylcles' calculation for zero dummy bytes. Following divide error is fixed by this change: Oops: divide error: 0000 [#1] PREEMPT SMP NOPTI ... ? do_trap+0xdb/0x100 ? do_error_trap+0x75/0xb0 ? spi_mem_calc_op_duration+0x56/0xb0 ? exc_divide_error+0x3b/0x70 ? spi_mem_calc_op_duration+0x56/0xb0 ? asm_exc_divide_error+0x1b/0x20 ? spi_mem_calc_op_duration+0x56/0xb0 ? spinand_select_op_variant+0xee/0x190 [spinand] spinand_match_and_init+0x13e/0x1a0 [spinand] spinand_manufacturer_match+0x6e/0xa0 [spinand] spinand_probe+0x357/0x7f0 [spinand] ? kernfs_activate+0x87/0xd0 spi_mem_probe+0x7a/0xb0 spi_probe+0x7d/0x130 Fixes: 226d6cb3cb79 ("spi: spi-mem: Estimate the time taken by operations") Suggested-by: Krishnamoorthi M Co-developed-by: Akshata MukundShetty Signed-off-by: Akshata MukundShetty Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20250424121333.417372-1-Raju.Rangoju@amd.com Reviewed-by: Miquel Raynal Signed-off-by: Mark Brown --- drivers/spi/spi-mem.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index a31a1db07aa4..5db0639d3b01 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -596,7 +596,11 @@ u64 spi_mem_calc_op_duration(struct spi_mem_op *op) ns_per_cycles = 1000000000 / op->max_freq; ncycles += ((op->cmd.nbytes * 8) / op->cmd.buswidth) / (op->cmd.dtr ? 2 : 1); ncycles += ((op->addr.nbytes * 8) / op->addr.buswidth) / (op->addr.dtr ? 2 : 1); - ncycles += ((op->dummy.nbytes * 8) / op->dummy.buswidth) / (op->dummy.dtr ? 2 : 1); + + /* Dummy bytes are optional for some SPI flash memory operations */ + if (op->dummy.nbytes) + ncycles += ((op->dummy.nbytes * 8) / op->dummy.buswidth) / (op->dummy.dtr ? 2 : 1); + ncycles += ((op->data.nbytes * 8) / op->data.buswidth) / (op->data.dtr ? 2 : 1); return ncycles * ns_per_cycles; From ba85883d160515129b58873f74376a89faf21c7c Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 25 Apr 2025 11:31:39 +0530 Subject: [PATCH 140/559] ASoC: amd: acp: Fix NULL pointer deref on acp resume path update chip data using dev_get_drvdata(dev->parent) instead of dev_get_platdata(dev). BUG: kernel NULL pointer dereference, address: 0000000000000010 Call Trace: ? __pfx_platform_pm_resume+0x10/0x10 platform_pm_resume+0x28/0x60 dpm_run_callback+0x51/0x1a0 device_resume+0x1a6/0x2b0 dpm_resume+0x168/0x230 Fixes: e3933683b25e ("ASoC: amd: acp: Remove redundant acp_dev_data structure") Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250425060144.1773265-1-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-rembrandt.c | 2 +- sound/soc/amd/acp/acp-renoir.c | 2 +- sound/soc/amd/acp/acp63.c | 2 +- sound/soc/amd/acp/acp70.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/amd/acp/acp-rembrandt.c b/sound/soc/amd/acp/acp-rembrandt.c index 746b6ed72029..cccdd10c345e 100644 --- a/sound/soc/amd/acp/acp-rembrandt.c +++ b/sound/soc/amd/acp/acp-rembrandt.c @@ -199,7 +199,7 @@ static void rembrandt_audio_remove(struct platform_device *pdev) static int rmb_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; diff --git a/sound/soc/amd/acp/acp-renoir.c b/sound/soc/amd/acp/acp-renoir.c index ebf0106fc737..04f6d70b6a92 100644 --- a/sound/soc/amd/acp/acp-renoir.c +++ b/sound/soc/amd/acp/acp-renoir.c @@ -146,7 +146,7 @@ static void renoir_audio_remove(struct platform_device *pdev) static int rn_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; diff --git a/sound/soc/amd/acp/acp63.c b/sound/soc/amd/acp/acp63.c index 52d895e624c7..1f15c96a9b94 100644 --- a/sound/soc/amd/acp/acp63.c +++ b/sound/soc/amd/acp/acp63.c @@ -250,7 +250,7 @@ static void acp63_audio_remove(struct platform_device *pdev) static int acp63_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; diff --git a/sound/soc/amd/acp/acp70.c b/sound/soc/amd/acp/acp70.c index 6d5f5ade075c..217b717e9beb 100644 --- a/sound/soc/amd/acp/acp70.c +++ b/sound/soc/amd/acp/acp70.c @@ -182,7 +182,7 @@ static void acp_acp70_audio_remove(struct platform_device *pdev) static int acp70_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; From 6d9b64156d849e358cb49b6b899fb0b7d262bda8 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 25 Apr 2025 11:31:40 +0530 Subject: [PATCH 141/559] ASoC: amd: acp: Fix NULL pointer deref in acp_i2s_set_tdm_slot Update chip data using dev_get_drvdata(dev->parent) to fix NULL pointer deref in acp_i2s_set_tdm_slot. Fixes: cd60dec8994c ("ASoC: amd: acp: Refactor TDM slots selction based on acp revision id") Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250425060144.1773265-2-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c index a38409dd1d34..70fa54d568ef 100644 --- a/sound/soc/amd/acp/acp-i2s.c +++ b/sound/soc/amd/acp/acp-i2s.c @@ -97,7 +97,7 @@ static int acp_i2s_set_tdm_slot(struct snd_soc_dai *dai, u32 tx_mask, u32 rx_mas struct acp_stream *stream; int slot_len, no_of_slots; - chip = dev_get_platdata(dev); + chip = dev_get_drvdata(dev->parent); switch (slot_width) { case SLOT_WIDTH_8: slot_len = 8; From 138e6da0392ed067d0db7b5b5b4582c3668cfcf9 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 25 Apr 2025 11:31:41 +0530 Subject: [PATCH 142/559] ASoC: amd: acp: Fix devm_snd_soc_register_card(acp-pdm-mach) failure Add condition check to fix devm_snd_soc_register_card(acp-pdm-mach) deferred probe failure, when pdm DSD entry is not available. [15.910456] acp_mach acp-pdm-mach: devm_snd_soc_register_card(acp-pdm-mach) failed: -517 [15.910536] platform acp-pdm-mach: deferred probe pending: (reason unknown) Fixes: 6e60db74b69c2 ("ASoC: amd: acp: Refactor acp machine select") Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250425060144.1773265-3-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-legacy-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/amd/acp/acp-legacy-common.c b/sound/soc/amd/acp/acp-legacy-common.c index b4d68484e06d..ba8db0851daa 100644 --- a/sound/soc/amd/acp/acp-legacy-common.c +++ b/sound/soc/amd/acp/acp-legacy-common.c @@ -450,7 +450,7 @@ int acp_machine_select(struct acp_chip_info *chip) struct snd_soc_acpi_mach *mach; int size, platform; - if (chip->flag == FLAG_AMD_LEGACY_ONLY_DMIC) { + if (chip->flag == FLAG_AMD_LEGACY_ONLY_DMIC && chip->is_pdm_dev) { platform = chip->acp_rev; chip->mach_dev = platform_device_register_data(chip->dev, "acp-pdm-mach", PLATFORM_DEVID_NONE, &platform, From a549b927ea3f5e50b1394209b64e6e17e31d4db8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 20 Apr 2025 10:56:59 +0200 Subject: [PATCH 143/559] ASoC: Intel: bytcr_rt5640: Add DMI quirk for Acer Aspire SW3-013 Acer Aspire SW3-013 requires the very same quirk as other Acer Aspire model for making it working. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220011 Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250420085716.12095-1-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 6446cda0f857..0f3b8f44e701 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -576,6 +576,19 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF2 | BYT_RT5640_MCLK_EN), }, + { /* Acer Aspire SW3-013 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW3-013"), + }, + .driver_data = (void *)(BYT_RT5640_DMIC1_MAP | + BYT_RT5640_JD_SRC_JD2_IN4N | + BYT_RT5640_OVCD_TH_2000UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_DIFF_MIC | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Acer"), From 75aea4b0656ead0facd13d2aae4cb77326e53d2f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 24 Apr 2025 06:47:14 -0700 Subject: [PATCH 144/559] perf/x86/intel: Only check the group flag for X86 leader A warning in intel_pmu_lbr_counters_reorder() may be triggered by below perf command. perf record -e "{cpu-clock,cycles/call-graph="lbr"/}" -- sleep 1 It's because the group is mistakenly treated as a branch counter group. The hw.flags of the leader are used to determine whether a group is a branch counters group. However, the hw.flags is only available for a hardware event. The field to store the flags is a union type. For a software event, it's a hrtimer. The corresponding bit may be set if the leader is a software event. For a branch counter group and other groups that have a group flag (e.g., topdown, PEBS counters snapshotting, and ACR), the leader must be a X86 event. Check the X86 event before checking the flag. The patch only fixes the issue for the branch counter group. The following patch will fix the other groups. There may be an alternative way to fix the issue by moving the hw.flags out of the union type. It should work for now. But it's still possible that the flags will be used by other types of events later. As long as that type of event is used as a leader, a similar issue will be triggered. So the alternative way is dropped. Fixes: 33744916196b ("perf/x86/intel: Support branch counters logging") Closes: https://lore.kernel.org/lkml/20250412091423.1839809-1-luogengkun@huaweicloud.com/ Reported-by: Luo Gengkun Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250424134718.311934-2-kan.liang@linux.intel.com --- arch/x86/events/core.c | 2 +- arch/x86/events/perf_event.h | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 3a4f031d2f44..139ad80d1df3 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -754,7 +754,7 @@ void x86_pmu_enable_all(int added) } } -static inline int is_x86_event(struct perf_event *event) +int is_x86_event(struct perf_event *event) { int i; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 2c0ce0e9545e..4237c379cdc5 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -110,9 +110,16 @@ static inline bool is_topdown_event(struct perf_event *event) return is_metric_event(event) || is_slots_event(event); } +int is_x86_event(struct perf_event *event); + +static inline bool check_leader_group(struct perf_event *leader, int flags) +{ + return is_x86_event(leader) ? !!(leader->hw.flags & flags) : false; +} + static inline bool is_branch_counters_group(struct perf_event *event) { - return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS; + return check_leader_group(event->group_leader, PERF_X86_EVENT_BRANCH_COUNTERS); } static inline bool is_pebs_counter_event_group(struct perf_event *event) From e9988ad7b1744991118ac348a804f9395368a284 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 24 Apr 2025 06:47:15 -0700 Subject: [PATCH 145/559] perf/x86/intel: Check the X86 leader for pebs_counter_event_group The PEBS counters snapshotting group also requires a group flag in the leader. The leader must be a X86 event. Fixes: e02e9b0374c3 ("perf/x86/intel: Support PEBS counters snapshotting") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250424134718.311934-3-kan.liang@linux.intel.com --- arch/x86/events/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 4237c379cdc5..46d120597bab 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -124,7 +124,7 @@ static inline bool is_branch_counters_group(struct perf_event *event) static inline bool is_pebs_counter_event_group(struct perf_event *event) { - return event->group_leader->hw.flags & PERF_X86_EVENT_PEBS_CNTR; + return check_leader_group(event->group_leader, PERF_X86_EVENT_PEBS_CNTR); } struct amd_nb { From 7da9960b59fb7e590eb8538c9428db55a4ea2d23 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 24 Apr 2025 06:47:18 -0700 Subject: [PATCH 146/559] perf/x86/intel/ds: Fix counter backwards of non-precise events counters-snapshotting The counter backwards may be observed in the PMI handler when counters-snapshotting some non-precise events in the freq mode. For the non-precise events, it's possible the counters-snapshotting records a positive value for an overflowed PEBS event. Then the HW auto-reload mechanism reset the counter to 0 immediately. Because the pebs_event_reset is cleared in the freq mode, which doesn't set the PERF_X86_EVENT_AUTO_RELOAD. In the PMI handler, 0 will be read rather than the positive value recorded in the counters-snapshotting record. The counters-snapshotting case has to be specially handled. Since the event value has been updated when processing the counters-snapshotting record, only needs to set the new period for the counter via x86_pmu_set_period(). Fixes: e02e9b0374c3 ("perf/x86/intel: Support PEBS counters snapshotting") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250424134718.311934-6-kan.liang@linux.intel.com --- arch/x86/events/intel/ds.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 18c3ab579b8b..9b20acc0e932 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2379,8 +2379,25 @@ __intel_pmu_pebs_last_event(struct perf_event *event, */ intel_pmu_save_and_restart_reload(event, count); } - } else - intel_pmu_save_and_restart(event); + } else { + /* + * For a non-precise event, it's possible the + * counters-snapshotting records a positive value for the + * overflowed event. Then the HW auto-reload mechanism + * reset the counter to 0 immediately, because the + * pebs_event_reset is cleared if the PERF_X86_EVENT_AUTO_RELOAD + * is not set. The counter backwards may be observed in a + * PMI handler. + * + * Since the event value has been updated when processing the + * counters-snapshotting record, only needs to set the new + * period for the counter. + */ + if (is_pebs_counter_event_group(event)) + static_call(x86_pmu_set_period)(event); + else + intel_pmu_save_and_restart(event); + } } static __always_inline void From 98698ca0e58734bc5c1c24e5bbc7429f981cd186 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Wed, 23 Apr 2025 11:47:15 +0100 Subject: [PATCH 147/559] staging: bcm2835-camera: Initialise dev in v4l2_dev Commit 42a2f6664e18 ("staging: vc04_services: Move global g_state to vchiq_state") changed mmal_init to pass dev->v4l2_dev.dev to vchiq_mmal_init, however nothing iniitialised dev->v4l2_dev, so we got a NULL pointer dereference. Set dev->v4l2_dev.dev during bcm2835_mmal_probe. The device pointer could be passed into v4l2_device_register to set it, however that also has other effects that would need additional changes. Fixes: 42a2f6664e18 ("staging: vc04_services: Move global g_state to vchiq_state") Cc: stable@vger.kernel.org Signed-off-by: Dave Stevenson Reviewed-by: Stefan Wahren Link: https://lore.kernel.org/r/20250423-staging-bcm2835-v4l2-fix-v2-1-3227f0ba4700@raspberrypi.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c index b839b50ac26a..fa7ea4ca4c36 100644 --- a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c +++ b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c @@ -1900,6 +1900,7 @@ static int bcm2835_mmal_probe(struct vchiq_device *device) __func__, ret); goto free_dev; } + dev->v4l2_dev.dev = &device->dev; /* setup v4l controls */ ret = bcm2835_mmal_init_controls(dev, &dev->ctrl_handler); From 2ca34b508774aaa590fc3698a54204706ecca4ba Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Fri, 18 Apr 2025 21:29:37 -0400 Subject: [PATCH 148/559] staging: axis-fifo: Correct handling of tx_fifo_depth for size validation Remove erroneous subtraction of 4 from the total FIFO depth read from device tree. The stored depth is for checking against total capacity, not initial vacancy. This prevented writes near the FIFO's full size. The check performed just before data transfer, which uses live reads of the TDFV register to determine current vacancy, correctly handles the initial Depth - 4 hardware state and subsequent FIFO fullness. Fixes: 4a965c5f89de ("staging: add driver for Xilinx AXI-Stream FIFO v4.1 IP core") Cc: stable@vger.kernel.org Signed-off-by: Gabriel Shahrouzi Link: https://lore.kernel.org/r/20250419012937.674924-1-gshahrouzi@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/axis-fifo/axis-fifo.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/staging/axis-fifo/axis-fifo.c b/drivers/staging/axis-fifo/axis-fifo.c index 7540c20090c7..04b3dc3cfe7c 100644 --- a/drivers/staging/axis-fifo/axis-fifo.c +++ b/drivers/staging/axis-fifo/axis-fifo.c @@ -775,9 +775,6 @@ static int axis_fifo_parse_dt(struct axis_fifo *fifo) goto end; } - /* IP sets TDFV to fifo depth - 4 so we will do the same */ - fifo->tx_fifo_depth -= 4; - ret = get_dts_property(fifo, "xlnx,use-rx-data", &fifo->has_rx_fifo); if (ret) { dev_err(fifo->dt_device, "missing xlnx,use-rx-data property\n"); From c6e8d85fafa7193613db37da29c0e8d6e2515b13 Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Fri, 18 Apr 2025 20:43:06 -0400 Subject: [PATCH 149/559] staging: axis-fifo: Remove hardware resets for user errors The axis-fifo driver performs a full hardware reset (via reset_ip_core()) in several error paths within the read and write functions. This reset flushes both TX and RX FIFOs and resets the AXI-Stream links. Allow the user to handle the error without causing hardware disruption or data loss in other FIFO paths. Fixes: 4a965c5f89de ("staging: add driver for Xilinx AXI-Stream FIFO v4.1 IP core") Cc: stable@vger.kernel.org Signed-off-by: Gabriel Shahrouzi Link: https://lore.kernel.org/r/20250419004306.669605-1-gshahrouzi@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/axis-fifo/axis-fifo.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/staging/axis-fifo/axis-fifo.c b/drivers/staging/axis-fifo/axis-fifo.c index 04b3dc3cfe7c..351f983ef914 100644 --- a/drivers/staging/axis-fifo/axis-fifo.c +++ b/drivers/staging/axis-fifo/axis-fifo.c @@ -393,16 +393,14 @@ static ssize_t axis_fifo_read(struct file *f, char __user *buf, bytes_available = ioread32(fifo->base_addr + XLLF_RLR_OFFSET); if (!bytes_available) { - dev_err(fifo->dt_device, "received a packet of length 0 - fifo core will be reset\n"); - reset_ip_core(fifo); + dev_err(fifo->dt_device, "received a packet of length 0\n"); ret = -EIO; goto end_unlock; } if (bytes_available > len) { - dev_err(fifo->dt_device, "user read buffer too small (available bytes=%zu user buffer bytes=%zu) - fifo core will be reset\n", + dev_err(fifo->dt_device, "user read buffer too small (available bytes=%zu user buffer bytes=%zu)\n", bytes_available, len); - reset_ip_core(fifo); ret = -EINVAL; goto end_unlock; } @@ -411,8 +409,7 @@ static ssize_t axis_fifo_read(struct file *f, char __user *buf, /* this probably can't happen unless IP * registers were previously mishandled */ - dev_err(fifo->dt_device, "received a packet that isn't word-aligned - fifo core will be reset\n"); - reset_ip_core(fifo); + dev_err(fifo->dt_device, "received a packet that isn't word-aligned\n"); ret = -EIO; goto end_unlock; } @@ -433,7 +430,6 @@ static ssize_t axis_fifo_read(struct file *f, char __user *buf, if (copy_to_user(buf + copied * sizeof(u32), tmp_buf, copy * sizeof(u32))) { - reset_ip_core(fifo); ret = -EFAULT; goto end_unlock; } @@ -542,7 +538,6 @@ static ssize_t axis_fifo_write(struct file *f, const char __user *buf, if (copy_from_user(tmp_buf, buf + copied * sizeof(u32), copy * sizeof(u32))) { - reset_ip_core(fifo); ret = -EFAULT; goto end_unlock; } From 3d59224947b024c9b2aa6e149a1537d449adb828 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 24 Apr 2025 21:50:14 +0530 Subject: [PATCH 150/559] cpufreq: ACPI: Re-sync CPU boost state on system resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During CPU hotunplug events (such as those occurring during suspend/resume cycles), platform firmware may modify the CPU boost state. If boost was disabled prior to CPU removal, it correctly remains disabled upon re-plug. However, if firmware re-enables boost while the CPU is offline, the CPU may return with boost enabled—even if it was originally disabled—once it is hotplugged back in. This leads to inconsistent behavior and violates user or kernel policy expectations. To maintain consistency, ensure the boost state is re-synchronized with the kernel policy when a CPU is hotplugged back in. Note: This re-synchronization is not necessary during the initial call to ->init() for a CPU, as the cpufreq core handles it via cpufreq_online(). At that point, acpi_cpufreq_driver.boost_enabled is initialized to the value returned by boost_state(0). Fixes: 2b16c631832d ("cpufreq: ACPI: Remove set_boost in acpi_cpufreq_cpu_init()") Reported-by: Nicholas Chin Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220013 Tested-by: Nicholas Chin Reviewed-by: Lifeng Zheng Signed-off-by: Viresh Kumar Link: https://patch.msgid.link/9c7de55fb06015c1b77e7dafd564b659838864e0.1745511526.git.viresh.kumar@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 924314cdeebc..d26b610e4f24 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -909,8 +909,19 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); - if (acpi_cpufreq_driver.set_boost) - policy->boost_supported = true; + if (acpi_cpufreq_driver.set_boost) { + if (policy->boost_supported) { + /* + * The firmware may have altered boost state while the + * CPU was offline (for example during a suspend-resume + * cycle). + */ + if (policy->boost_enabled != boost_state(cpu)) + set_boost(policy, policy->boost_enabled); + } else { + policy->boost_supported = true; + } + } return result; From 6d0417e4e1cf66fd917f06f0454958362714ef7d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 9 Apr 2025 16:08:48 -0400 Subject: [PATCH 151/559] Bluetooth: hci_conn: Fix not setting conn_timeout for Broadcast Receiver Broadcast Receiver requires creating PA sync but the command just generates a status so this makes use of __hci_cmd_sync_status_sk to wait for HCI_EV_LE_PA_SYNC_ESTABLISHED, also because of this chance it is not longer necessary to use a custom method to serialize the process of creating the PA sync since the cmd_work_sync itself ensures only one command would be pending which now awaits for HCI_EV_LE_PA_SYNC_ESTABLISHED before proceeding to next connection. Fixes: 4a5e0ba68676 ("Bluetooth: ISO: Do not emit LE PA Create Sync if previous is pending") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 2 + include/net/bluetooth/hci_core.h | 13 +++-- include/net/bluetooth/hci_sync.h | 2 + net/bluetooth/hci_conn.c | 92 +------------------------------- net/bluetooth/hci_event.c | 6 +-- net/bluetooth/hci_sync.c | 87 ++++++++++++++++++++++++++++-- 6 files changed, 95 insertions(+), 107 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index a8586c3058c7..8ea7a063cc65 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1931,6 +1931,8 @@ struct hci_cp_le_pa_create_sync { __u8 sync_cte_type; } __packed; +#define HCI_OP_LE_PA_CREATE_SYNC_CANCEL 0x2045 + #define HCI_OP_LE_PA_TERM_SYNC 0x2046 struct hci_cp_le_pa_term_sync { __le16 handle; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 5115da34f881..f20368b9a5d2 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1113,10 +1113,8 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev, return NULL; } -static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev, - __u8 sid, - bdaddr_t *dst, - __u8 dst_type) +static inline struct hci_conn * +hci_conn_hash_lookup_create_pa_sync(struct hci_dev *hdev) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; @@ -1124,8 +1122,10 @@ static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || bacmp(&c->dst, dst) || - c->dst_type != dst_type || c->sid != sid) + if (c->type != ISO_LINK) + continue; + + if (!test_bit(HCI_CONN_CREATE_PA_SYNC, &c->flags)) continue; rcu_read_unlock(); @@ -1524,7 +1524,6 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); bool hci_iso_setup_path(struct hci_conn *conn); int hci_le_create_cis_pending(struct hci_dev *hdev); -int hci_pa_create_sync_pending(struct hci_dev *hdev); int hci_le_big_create_sync_pending(struct hci_dev *hdev); int hci_conn_check_create_cis(struct hci_conn *conn); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 7e2cf0cca939..93dac4c7f9e3 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -185,3 +185,5 @@ int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, struct hci_conn_params *params); + +int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7e1b53857648..c3112ce39f67 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2064,95 +2064,6 @@ static int create_big_sync(struct hci_dev *hdev, void *data) return hci_le_create_big(conn, &conn->iso_qos); } -static void create_pa_complete(struct hci_dev *hdev, void *data, int err) -{ - bt_dev_dbg(hdev, ""); - - if (err) - bt_dev_err(hdev, "Unable to create PA: %d", err); -} - -static bool hci_conn_check_create_pa_sync(struct hci_conn *conn) -{ - if (conn->type != ISO_LINK || conn->sid == HCI_SID_INVALID) - return false; - - return true; -} - -static int create_pa_sync(struct hci_dev *hdev, void *data) -{ - struct hci_cp_le_pa_create_sync cp = {0}; - struct hci_conn *conn; - int err = 0; - - hci_dev_lock(hdev); - - rcu_read_lock(); - - /* The spec allows only one pending LE Periodic Advertising Create - * Sync command at a time. If the command is pending now, don't do - * anything. We check for pending connections after each PA Sync - * Established event. - * - * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E - * page 2493: - * - * If the Host issues this command when another HCI_LE_Periodic_ - * Advertising_Create_Sync command is pending, the Controller shall - * return the error code Command Disallowed (0x0C). - */ - list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - if (test_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags)) - goto unlock; - } - - list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - if (hci_conn_check_create_pa_sync(conn)) { - struct bt_iso_qos *qos = &conn->iso_qos; - - cp.options = qos->bcast.options; - cp.sid = conn->sid; - cp.addr_type = conn->dst_type; - bacpy(&cp.addr, &conn->dst); - cp.skip = cpu_to_le16(qos->bcast.skip); - cp.sync_timeout = cpu_to_le16(qos->bcast.sync_timeout); - cp.sync_cte_type = qos->bcast.sync_cte_type; - - break; - } - } - -unlock: - rcu_read_unlock(); - - hci_dev_unlock(hdev); - - if (bacmp(&cp.addr, BDADDR_ANY)) { - hci_dev_set_flag(hdev, HCI_PA_SYNC); - set_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); - - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); - if (!err) - err = hci_update_passive_scan_sync(hdev); - - if (err) { - hci_dev_clear_flag(hdev, HCI_PA_SYNC); - clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); - } - } - - return err; -} - -int hci_pa_create_sync_pending(struct hci_dev *hdev) -{ - /* Queue start pa_create_sync and scan */ - return hci_cmd_sync_queue(hdev, create_pa_sync, - NULL, create_pa_complete); -} - struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos) @@ -2167,10 +2078,11 @@ struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, conn->dst_type = dst_type; conn->sid = sid; conn->state = BT_LISTEN; + conn->conn_timeout = msecs_to_jiffies(qos->bcast.sync_timeout * 10); hci_conn_hold(conn); - hci_pa_create_sync_pending(hdev); + hci_connect_pa_sync(hdev, conn); return conn; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 5f808f0b0e9a..ea7ccafd055a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6378,8 +6378,7 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, hci_dev_clear_flag(hdev, HCI_PA_SYNC); - conn = hci_conn_hash_lookup_sid(hdev, ev->sid, &ev->bdaddr, - ev->bdaddr_type); + conn = hci_conn_hash_lookup_create_pa_sync(hdev); if (!conn) { bt_dev_err(hdev, "Unable to find connection for dst %pMR sid 0x%2.2x", @@ -6418,9 +6417,6 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, } unlock: - /* Handle any other pending PA sync command */ - hci_pa_create_sync_pending(hdev); - hci_dev_unlock(hdev); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 609b035e5c90..99c116b056ce 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2693,16 +2693,16 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev) /* Force address filtering if PA Sync is in progress */ if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) { - struct hci_cp_le_pa_create_sync *sent; + struct hci_conn *conn; - sent = hci_sent_cmd_data(hdev, HCI_OP_LE_PA_CREATE_SYNC); - if (sent) { + conn = hci_conn_hash_lookup_create_pa_sync(hdev); + if (conn) { struct conn_params pa; memset(&pa, 0, sizeof(pa)); - bacpy(&pa.addr, &sent->addr); - pa.addr_type = sent->addr_type; + bacpy(&pa.addr, &conn->dst); + pa.addr_type = conn->dst_type; /* Clear first since there could be addresses left * behind. @@ -6895,3 +6895,80 @@ int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, return __hci_cmd_sync_status(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } + +static void create_pa_complete(struct hci_dev *hdev, void *data, int err) +{ + bt_dev_dbg(hdev, "err %d", err); + + if (!err) + return; + + hci_dev_clear_flag(hdev, HCI_PA_SYNC); + + if (err == -ECANCELED) + return; + + hci_dev_lock(hdev); + + hci_update_passive_scan_sync(hdev); + + hci_dev_unlock(hdev); +} + +static int hci_le_pa_create_sync(struct hci_dev *hdev, void *data) +{ + struct hci_cp_le_pa_create_sync cp; + struct hci_conn *conn = data; + struct bt_iso_qos *qos = &conn->iso_qos; + int err; + + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; + + if (hci_dev_test_and_set_flag(hdev, HCI_PA_SYNC)) + return -EBUSY; + + /* Mark HCI_CONN_CREATE_PA_SYNC so hci_update_passive_scan_sync can + * program the address in the allow list so PA advertisements can be + * received. + */ + set_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); + + hci_update_passive_scan_sync(hdev); + + memset(&cp, 0, sizeof(cp)); + cp.options = qos->bcast.options; + cp.sid = conn->sid; + cp.addr_type = conn->dst_type; + bacpy(&cp.addr, &conn->dst); + cp.skip = cpu_to_le16(qos->bcast.skip); + cp.sync_timeout = cpu_to_le16(qos->bcast.sync_timeout); + cp.sync_cte_type = qos->bcast.sync_cte_type; + + /* The spec allows only one pending LE Periodic Advertising Create + * Sync command at a time so we forcefully wait for PA Sync Established + * event since cmd_work can only schedule one command at a time. + * + * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E + * page 2493: + * + * If the Host issues this command when another HCI_LE_Periodic_ + * Advertising_Create_Sync command is pending, the Controller shall + * return the error code Command Disallowed (0x0C). + */ + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_PA_CREATE_SYNC, + sizeof(cp), &cp, + HCI_EV_LE_PA_SYNC_ESTABLISHED, + conn->conn_timeout, NULL); + if (err == -ETIMEDOUT) + __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC_CANCEL, + 0, NULL, HCI_CMD_TIMEOUT); + + return err; +} + +int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn) +{ + return hci_cmd_sync_queue_once(hdev, hci_le_pa_create_sync, conn, + create_pa_complete); +} From 024421cf39923927ab2b5fe895d1d922b9abe67f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 16 Apr 2025 15:43:32 -0400 Subject: [PATCH 152/559] Bluetooth: hci_conn: Fix not setting timeout for BIG Create Sync BIG Create Sync requires the command to just generates a status so this makes use of __hci_cmd_sync_status_sk to wait for HCI_EVT_LE_BIG_SYNC_ESTABLISHED, also because of this chance it is not longer necessary to use a custom method to serialize the process of creating the BIG sync since the cmd_work_sync itself ensures only one command would be pending which now awaits for HCI_EVT_LE_BIG_SYNC_ESTABLISHED before proceeding to next connection. Fixes: 42ecf1947135 ("Bluetooth: ISO: Do not emit LE BIG Create Sync if previous is pending") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 2 +- include/net/bluetooth/hci_core.h | 7 ++- include/net/bluetooth/hci_sync.h | 1 + net/bluetooth/hci_conn.c | 89 ++------------------------------ net/bluetooth/hci_event.c | 9 ++-- net/bluetooth/hci_sync.c | 63 ++++++++++++++++++++++ net/bluetooth/iso.c | 26 +++++----- 7 files changed, 88 insertions(+), 109 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 8ea7a063cc65..797992019f9e 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2832,7 +2832,7 @@ struct hci_evt_le_create_big_complete { __le16 bis_handle[]; } __packed; -#define HCI_EVT_LE_BIG_SYNC_ESTABILISHED 0x1d +#define HCI_EVT_LE_BIG_SYNC_ESTABLISHED 0x1d struct hci_evt_le_big_sync_estabilished { __u8 status; __u8 handle; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f20368b9a5d2..522d837a23fa 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1524,7 +1524,6 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); bool hci_iso_setup_path(struct hci_conn *conn); int hci_le_create_cis_pending(struct hci_dev *hdev); -int hci_le_big_create_sync_pending(struct hci_dev *hdev); int hci_conn_check_create_cis(struct hci_conn *conn); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, @@ -1565,9 +1564,9 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 data_len, __u8 *data); struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos); -int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, - struct bt_iso_qos *qos, - __u16 sync_handle, __u8 num_bis, __u8 bis[]); +int hci_conn_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, + struct bt_iso_qos *qos, __u16 sync_handle, + __u8 num_bis, __u8 bis[]); int hci_conn_check_link_mode(struct hci_conn *conn); int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level); int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 93dac4c7f9e3..72558c826aa1 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -187,3 +187,4 @@ int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, struct hci_conn_params *params); int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn); +int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index c3112ce39f67..6533e281ada3 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2087,89 +2087,9 @@ struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, return conn; } -static bool hci_conn_check_create_big_sync(struct hci_conn *conn) -{ - if (!conn->num_bis) - return false; - - return true; -} - -static void big_create_sync_complete(struct hci_dev *hdev, void *data, int err) -{ - bt_dev_dbg(hdev, ""); - - if (err) - bt_dev_err(hdev, "Unable to create BIG sync: %d", err); -} - -static int big_create_sync(struct hci_dev *hdev, void *data) -{ - DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11); - struct hci_conn *conn; - - rcu_read_lock(); - - pdu->num_bis = 0; - - /* The spec allows only one pending LE BIG Create Sync command at - * a time. If the command is pending now, don't do anything. We - * check for pending connections after each BIG Sync Established - * event. - * - * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E - * page 2586: - * - * If the Host sends this command when the Controller is in the - * process of synchronizing to any BIG, i.e. the HCI_LE_BIG_Sync_ - * Established event has not been generated, the Controller shall - * return the error code Command Disallowed (0x0C). - */ - list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - if (test_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags)) - goto unlock; - } - - list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - if (hci_conn_check_create_big_sync(conn)) { - struct bt_iso_qos *qos = &conn->iso_qos; - - set_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); - - pdu->handle = qos->bcast.big; - pdu->sync_handle = cpu_to_le16(conn->sync_handle); - pdu->encryption = qos->bcast.encryption; - memcpy(pdu->bcode, qos->bcast.bcode, - sizeof(pdu->bcode)); - pdu->mse = qos->bcast.mse; - pdu->timeout = cpu_to_le16(qos->bcast.timeout); - pdu->num_bis = conn->num_bis; - memcpy(pdu->bis, conn->bis, conn->num_bis); - - break; - } - } - -unlock: - rcu_read_unlock(); - - if (!pdu->num_bis) - return 0; - - return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC, - struct_size(pdu, bis, pdu->num_bis), pdu); -} - -int hci_le_big_create_sync_pending(struct hci_dev *hdev) -{ - /* Queue big_create_sync */ - return hci_cmd_sync_queue_once(hdev, big_create_sync, - NULL, big_create_sync_complete); -} - -int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, - struct bt_iso_qos *qos, - __u16 sync_handle, __u8 num_bis, __u8 bis[]) +int hci_conn_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, + struct bt_iso_qos *qos, __u16 sync_handle, + __u8 num_bis, __u8 bis[]) { int err; @@ -2186,9 +2106,10 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, hcon->num_bis = num_bis; memcpy(hcon->bis, bis, num_bis); + hcon->conn_timeout = msecs_to_jiffies(qos->bcast.timeout * 10); } - return hci_le_big_create_sync_pending(hdev); + return hci_connect_big_sync(hdev, hcon); } static void create_big_complete(struct hci_dev *hdev, void *data, int err) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ea7ccafd055a..6d6061111ac5 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6928,7 +6928,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); - if (!hci_le_ev_skb_pull(hdev, skb, HCI_EVT_LE_BIG_SYNC_ESTABILISHED, + if (!hci_le_ev_skb_pull(hdev, skb, HCI_EVT_LE_BIG_SYNC_ESTABLISHED, flex_array_size(ev, bis, ev->num_bis))) return; @@ -6999,9 +6999,6 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, } unlock: - /* Handle any other pending BIG sync command */ - hci_le_big_create_sync_pending(hdev); - hci_dev_unlock(hdev); } @@ -7123,8 +7120,8 @@ static const struct hci_le_ev { hci_le_create_big_complete_evt, sizeof(struct hci_evt_le_create_big_complete), HCI_MAX_EVENT_SIZE), - /* [0x1d = HCI_EV_LE_BIG_SYNC_ESTABILISHED] */ - HCI_LE_EV_VL(HCI_EVT_LE_BIG_SYNC_ESTABILISHED, + /* [0x1d = HCI_EV_LE_BIG_SYNC_ESTABLISHED] */ + HCI_LE_EV_VL(HCI_EVT_LE_BIG_SYNC_ESTABLISHED, hci_le_big_sync_established_evt, sizeof(struct hci_evt_le_big_sync_estabilished), HCI_MAX_EVENT_SIZE), diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 99c116b056ce..e56b1cbedab9 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6972,3 +6972,66 @@ int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn) return hci_cmd_sync_queue_once(hdev, hci_le_pa_create_sync, conn, create_pa_complete); } + +static void create_big_complete(struct hci_dev *hdev, void *data, int err) +{ + struct hci_conn *conn = data; + + bt_dev_dbg(hdev, "err %d", err); + + if (err == -ECANCELED) + return; + + if (hci_conn_valid(hdev, conn)) + clear_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); +} + +static int hci_le_big_create_sync(struct hci_dev *hdev, void *data) +{ + DEFINE_FLEX(struct hci_cp_le_big_create_sync, cp, bis, num_bis, 0x11); + struct hci_conn *conn = data; + struct bt_iso_qos *qos = &conn->iso_qos; + int err; + + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; + + set_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); + + memset(cp, 0, sizeof(*cp)); + cp->handle = qos->bcast.big; + cp->sync_handle = cpu_to_le16(conn->sync_handle); + cp->encryption = qos->bcast.encryption; + memcpy(cp->bcode, qos->bcast.bcode, sizeof(cp->bcode)); + cp->mse = qos->bcast.mse; + cp->timeout = cpu_to_le16(qos->bcast.timeout); + cp->num_bis = conn->num_bis; + memcpy(cp->bis, conn->bis, conn->num_bis); + + /* The spec allows only one pending LE BIG Create Sync command at + * a time, so we forcefully wait for BIG Sync Established event since + * cmd_work can only schedule one command at a time. + * + * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E + * page 2586: + * + * If the Host sends this command when the Controller is in the + * process of synchronizing to any BIG, i.e. the HCI_LE_BIG_Sync_ + * Established event has not been generated, the Controller shall + * return the error code Command Disallowed (0x0C). + */ + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_BIG_CREATE_SYNC, + struct_size(cp, bis, cp->num_bis), cp, + HCI_EVT_LE_BIG_SYNC_ESTABLISHED, + conn->conn_timeout, NULL); + if (err == -ETIMEDOUT) + hci_le_big_terminate_sync(hdev, cp->handle); + + return err; +} + +int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn) +{ + return hci_cmd_sync_queue_once(hdev, hci_le_big_create_sync, conn, + create_big_complete); +} diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 3501a991f1c6..2819cda616bc 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1462,14 +1462,13 @@ static void iso_conn_big_sync(struct sock *sk) lock_sock(sk); if (!test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) { - err = hci_le_big_create_sync(hdev, iso_pi(sk)->conn->hcon, - &iso_pi(sk)->qos, - iso_pi(sk)->sync_handle, - iso_pi(sk)->bc_num_bis, - iso_pi(sk)->bc_bis); + err = hci_conn_big_create_sync(hdev, iso_pi(sk)->conn->hcon, + &iso_pi(sk)->qos, + iso_pi(sk)->sync_handle, + iso_pi(sk)->bc_num_bis, + iso_pi(sk)->bc_bis); if (err) - bt_dev_err(hdev, "hci_le_big_create_sync: %d", - err); + bt_dev_err(hdev, "hci_big_create_sync: %d", err); } release_sock(sk); @@ -1922,7 +1921,7 @@ static void iso_conn_ready(struct iso_conn *conn) hcon); } else if (test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags)) { ev = hci_recv_event_data(hcon->hdev, - HCI_EVT_LE_BIG_SYNC_ESTABILISHED); + HCI_EVT_LE_BIG_SYNC_ESTABLISHED); /* Get reference to PA sync parent socket, if it exists */ parent = iso_get_sock(&hcon->src, &hcon->dst, @@ -2113,12 +2112,11 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) && !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) { - err = hci_le_big_create_sync(hdev, - hcon, - &iso_pi(sk)->qos, - iso_pi(sk)->sync_handle, - iso_pi(sk)->bc_num_bis, - iso_pi(sk)->bc_bis); + err = hci_conn_big_create_sync(hdev, hcon, + &iso_pi(sk)->qos, + iso_pi(sk)->sync_handle, + iso_pi(sk)->bc_num_bis, + iso_pi(sk)->bc_bis); if (err) { bt_dev_err(hdev, "hci_le_big_create_sync: %d", err); From d1af1f02ef8653dea4573e444136c8331189cd59 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Thu, 17 Apr 2025 09:18:42 +0530 Subject: [PATCH 153/559] Bluetooth: btintel_pcie: Avoid redundant buffer allocation Reuse the skb buffer provided by the PCIe driver to pass it onto the stack, instead of copying it to a new skb. Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Kiran K Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 33 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index c1e69fcc9c4f..efe6ad6d4dc0 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -957,8 +957,10 @@ static int btintel_pcie_recv_event(struct hci_dev *hdev, struct sk_buff *skb) /* This is a debug event that comes from IML and OP image when it * starts execution. There is no need pass this event to stack. */ - if (skb->data[2] == 0x97) + if (skb->data[2] == 0x97) { + hci_recv_diag(hdev, skb); return 0; + } } return hci_recv_frame(hdev, skb); @@ -974,7 +976,6 @@ static int btintel_pcie_recv_frame(struct btintel_pcie_data *data, u8 pkt_type; u16 plen; u32 pcie_pkt_type; - struct sk_buff *new_skb; void *pdata; struct hci_dev *hdev = data->hdev; @@ -1051,24 +1052,20 @@ static int btintel_pcie_recv_frame(struct btintel_pcie_data *data, bt_dev_dbg(hdev, "pkt_type: 0x%2.2x len: %u", pkt_type, plen); - new_skb = bt_skb_alloc(plen, GFP_ATOMIC); - if (!new_skb) { - bt_dev_err(hdev, "Failed to allocate memory for skb of len: %u", - skb->len); - ret = -ENOMEM; - goto exit_error; - } - - hci_skb_pkt_type(new_skb) = pkt_type; - skb_put_data(new_skb, skb->data, plen); + hci_skb_pkt_type(skb) = pkt_type; hdev->stat.byte_rx += plen; + skb_trim(skb, plen); if (pcie_pkt_type == BTINTEL_PCIE_HCI_EVT_PKT) - ret = btintel_pcie_recv_event(hdev, new_skb); + ret = btintel_pcie_recv_event(hdev, skb); else - ret = hci_recv_frame(hdev, new_skb); + ret = hci_recv_frame(hdev, skb); + skb = NULL; /* skb is freed in the callee */ exit_error: + if (skb) + kfree_skb(skb); + if (ret) hdev->stat.err_rx++; @@ -1202,8 +1199,6 @@ static void btintel_pcie_rx_work(struct work_struct *work) struct btintel_pcie_data *data = container_of(work, struct btintel_pcie_data, rx_work); struct sk_buff *skb; - int err; - struct hci_dev *hdev = data->hdev; if (test_bit(BTINTEL_PCIE_HWEXP_INPROGRESS, &data->flags)) { /* Unlike usb products, controller will not send hardware @@ -1224,11 +1219,7 @@ static void btintel_pcie_rx_work(struct work_struct *work) /* Process the sk_buf in queue and send to the HCI layer */ while ((skb = skb_dequeue(&data->rx_skb_q))) { - err = btintel_pcie_recv_frame(data, skb); - if (err) - bt_dev_err(hdev, "Failed to send received frame: %d", - err); - kfree_skb(skb); + btintel_pcie_recv_frame(data, skb); } } From 0317b033abcd1d8dd2798f0e2de5e84543d0bd22 Mon Sep 17 00:00:00 2001 From: En-Wei Wu Date: Mon, 21 Apr 2025 21:00:37 +0800 Subject: [PATCH 154/559] Bluetooth: btusb: avoid NULL pointer dereference in skb_dequeue() A NULL pointer dereference can occur in skb_dequeue() when processing a QCA firmware crash dump on WCN7851 (0489:e0f3). [ 93.672166] Bluetooth: hci0: ACL memdump size(589824) [ 93.672475] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 93.672517] Workqueue: hci0 hci_devcd_rx [bluetooth] [ 93.672598] RIP: 0010:skb_dequeue+0x50/0x80 The issue stems from handle_dump_pkt_qca() returning 0 even when a dump packet is successfully processed. This is because it incorrectly forwards the return value of hci_devcd_init() (which returns 0 on success). As a result, the caller (btusb_recv_acl_qca() or btusb_recv_evt_qca()) assumes the packet was not handled and passes it to hci_recv_frame(), leading to premature kfree() of the skb. Later, hci_devcd_rx() attempts to dequeue the same skb from the dump queue, resulting in a NULL pointer dereference. Fix this by: 1. Making handle_dump_pkt_qca() return 0 on success and negative errno on failure, consistent with kernel conventions. 2. Splitting dump packet detection into separate functions for ACL and event packets for better structure and readability. This ensures dump packets are properly identified and consumed, avoiding double handling and preventing NULL pointer access. Fixes: 20981ce2d5a5 ("Bluetooth: btusb: Add WCN6855 devcoredump support") Signed-off-by: En-Wei Wu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 101 +++++++++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 28 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 5012b5ff92c8..a42dedb78e0a 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3010,22 +3010,16 @@ static void btusb_coredump_qca(struct hci_dev *hdev) bt_dev_err(hdev, "%s: triggle crash failed (%d)", __func__, err); } -/* - * ==0: not a dump pkt. - * < 0: fails to handle a dump pkt - * > 0: otherwise. - */ +/* Return: 0 on success, negative errno on failure. */ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) { - int ret = 1; + int ret = 0; u8 pkt_type; u8 *sk_ptr; unsigned int sk_len; u16 seqno; u32 dump_size; - struct hci_event_hdr *event_hdr; - struct hci_acl_hdr *acl_hdr; struct qca_dump_hdr *dump_hdr; struct btusb_data *btdata = hci_get_drvdata(hdev); struct usb_device *udev = btdata->udev; @@ -3035,30 +3029,14 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) sk_len = skb->len; if (pkt_type == HCI_ACLDATA_PKT) { - acl_hdr = hci_acl_hdr(skb); - if (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE) - return 0; sk_ptr += HCI_ACL_HDR_SIZE; sk_len -= HCI_ACL_HDR_SIZE; - event_hdr = (struct hci_event_hdr *)sk_ptr; - } else { - event_hdr = hci_event_hdr(skb); } - if ((event_hdr->evt != HCI_VENDOR_PKT) - || (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) - return 0; - sk_ptr += HCI_EVENT_HDR_SIZE; sk_len -= HCI_EVENT_HDR_SIZE; dump_hdr = (struct qca_dump_hdr *)sk_ptr; - if ((sk_len < offsetof(struct qca_dump_hdr, data)) - || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) - || (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) - return 0; - - /*it is dump pkt now*/ seqno = le16_to_cpu(dump_hdr->seqno); if (seqno == 0) { set_bit(BTUSB_HW_SSR_ACTIVE, &btdata->flags); @@ -3132,17 +3110,84 @@ out: return ret; } +/* Return: true if the ACL packet is a dump packet, false otherwise. */ +static bool acl_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb) +{ + u8 *sk_ptr; + unsigned int sk_len; + + struct hci_event_hdr *event_hdr; + struct hci_acl_hdr *acl_hdr; + struct qca_dump_hdr *dump_hdr; + + sk_ptr = skb->data; + sk_len = skb->len; + + acl_hdr = hci_acl_hdr(skb); + if (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE) + return false; + + sk_ptr += HCI_ACL_HDR_SIZE; + sk_len -= HCI_ACL_HDR_SIZE; + event_hdr = (struct hci_event_hdr *)sk_ptr; + + if ((event_hdr->evt != HCI_VENDOR_PKT) || + (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) + return false; + + sk_ptr += HCI_EVENT_HDR_SIZE; + sk_len -= HCI_EVENT_HDR_SIZE; + + dump_hdr = (struct qca_dump_hdr *)sk_ptr; + if ((sk_len < offsetof(struct qca_dump_hdr, data)) || + (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || + (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) + return false; + + return true; +} + +/* Return: true if the event packet is a dump packet, false otherwise. */ +static bool evt_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb) +{ + u8 *sk_ptr; + unsigned int sk_len; + + struct hci_event_hdr *event_hdr; + struct qca_dump_hdr *dump_hdr; + + sk_ptr = skb->data; + sk_len = skb->len; + + event_hdr = hci_event_hdr(skb); + + if ((event_hdr->evt != HCI_VENDOR_PKT) + || (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) + return false; + + sk_ptr += HCI_EVENT_HDR_SIZE; + sk_len -= HCI_EVENT_HDR_SIZE; + + dump_hdr = (struct qca_dump_hdr *)sk_ptr; + if ((sk_len < offsetof(struct qca_dump_hdr, data)) || + (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || + (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) + return false; + + return true; +} + static int btusb_recv_acl_qca(struct hci_dev *hdev, struct sk_buff *skb) { - if (handle_dump_pkt_qca(hdev, skb)) - return 0; + if (acl_pkt_is_dump_qca(hdev, skb)) + return handle_dump_pkt_qca(hdev, skb); return hci_recv_frame(hdev, skb); } static int btusb_recv_evt_qca(struct hci_dev *hdev, struct sk_buff *skb) { - if (handle_dump_pkt_qca(hdev, skb)) - return 0; + if (evt_pkt_is_dump_qca(hdev, skb)) + return handle_dump_pkt_qca(hdev, skb); return hci_recv_frame(hdev, skb); } From 07e90048e356a29079fbc011cfc2e1fa1d1c5ac9 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Tue, 22 Apr 2025 09:21:55 +0800 Subject: [PATCH 155/559] Bluetooth: btmtksdio: Check function enabled before doing close Check BTMTKSDIO_FUNC_ENABLED flag before doing close to prevent btmtksdio_close been called twice. Fixes: 6ac4233afb9a ("Bluetooth: btmtksdio: Prevent enabling interrupts after IRQ handler removal") Signed-off-by: Chris Lu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtksdio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index edd5eead1e93..e5a119ca7243 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -723,6 +723,10 @@ static int btmtksdio_close(struct hci_dev *hdev) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); + /* Skip btmtksdio_close if BTMTKSDIO_FUNC_ENABLED isn't set */ + if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) + return 0; + sdio_claim_host(bdev->func); /* Disable interrupt */ From 0b6d58bc6ea85e57de25c828444928e4a0aa79cb Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Tue, 22 Apr 2025 09:21:56 +0800 Subject: [PATCH 156/559] Bluetooth: btmtksdio: Do close if SDIO card removed without close To prevent Bluetooth SDIO card from be physically removed suddenly, driver needs to ensure btmtksdio_close is called before btmtksdio_remove to disable interrupts and txrx workqueue. Fixes: 6ac4233afb9a ("Bluetooth: btmtksdio: Prevent enabling interrupts after IRQ handler removal") Signed-off-by: Chris Lu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtksdio.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index e5a119ca7243..1d26207b2ba7 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -1447,11 +1447,15 @@ static void btmtksdio_remove(struct sdio_func *func) if (!bdev) return; + hdev = bdev->hdev; + + /* Make sure to call btmtksdio_close before removing sdio card */ + if (test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) + btmtksdio_close(hdev); + /* Be consistent the state in btmtksdio_probe */ pm_runtime_get_noresume(bdev->dev); - hdev = bdev->hdev; - sdio_set_drvdata(func, NULL); hci_unregister_dev(hdev); hci_free_dev(hdev); From 1c7664957e4edb234c69de2db4be1f740d2df564 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Sun, 20 Apr 2025 07:21:56 +0530 Subject: [PATCH 157/559] Bluetooth: btintel_pcie: Add additional to checks to clear TX/RX paths Due to a hardware issue, there is a possibility that the driver may miss an MSIx interrupt on the RX/TX data path. Since the TX and RX paths are independent, when a TX MSIx interrupt occurs, the driver can check the RX queue for any pending data and process it if present. The same approach applies to the RX path. Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Chandrashekar Devegowda Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index efe6ad6d4dc0..0a759ea26fd3 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1272,10 +1272,8 @@ static void btintel_pcie_msix_rx_handle(struct btintel_pcie_data *data) bt_dev_dbg(hdev, "RXQ: cr_hia: %u cr_tia: %u", cr_hia, cr_tia); /* Check CR_TIA and CR_HIA for change */ - if (cr_tia == cr_hia) { - bt_dev_warn(hdev, "RXQ: no new CD found"); + if (cr_tia == cr_hia) return; - } rxq = &data->rxq; @@ -1311,6 +1309,16 @@ static irqreturn_t btintel_pcie_msix_isr(int irq, void *data) return IRQ_WAKE_THREAD; } +static inline bool btintel_pcie_is_rxq_empty(struct btintel_pcie_data *data) +{ + return data->ia.cr_hia[BTINTEL_PCIE_RXQ_NUM] == data->ia.cr_tia[BTINTEL_PCIE_RXQ_NUM]; +} + +static inline bool btintel_pcie_is_txackq_empty(struct btintel_pcie_data *data) +{ + return data->ia.cr_tia[BTINTEL_PCIE_TXQ_NUM] == data->ia.cr_hia[BTINTEL_PCIE_TXQ_NUM]; +} + static irqreturn_t btintel_pcie_irq_msix_handler(int irq, void *dev_id) { struct msix_entry *entry = dev_id; @@ -1342,12 +1350,18 @@ static irqreturn_t btintel_pcie_irq_msix_handler(int irq, void *dev_id) btintel_pcie_msix_gp0_handler(data); /* For TX */ - if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_0) + if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_0) { btintel_pcie_msix_tx_handle(data); + if (!btintel_pcie_is_rxq_empty(data)) + btintel_pcie_msix_rx_handle(data); + } /* For RX */ - if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_1) + if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_1) { btintel_pcie_msix_rx_handle(data); + if (!btintel_pcie_is_txackq_empty(data)) + btintel_pcie_msix_tx_handle(data); + } /* * Before sending the interrupt the HW disables it to prevent a nested From 3908feb1bd7f319a10e18d84369a48163264cc7d Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Thu, 24 Apr 2025 22:51:03 +0300 Subject: [PATCH 158/559] Bluetooth: L2CAP: copy RX timestamp to new fragments Copy timestamp too when allocating new skb for received fragment. Fixes missing RX timestamps with fragmentation. Fixes: 4d7ea8ee90e4 ("Bluetooth: L2CAP: Fix handling fragmented length") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 5ca7ac43c58d..73472756618a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7415,6 +7415,9 @@ static int l2cap_recv_frag(struct l2cap_conn *conn, struct sk_buff *skb, return -ENOMEM; /* Init rx_len */ conn->rx_len = len; + + skb_set_delivery_time(conn->rx_skb, skb->tstamp, + skb->tstamp_type); } /* Copy as much as the rx_skb can hold */ From 14ae3003e73e777c9b36385a7c86f754b50a1821 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 21 Apr 2025 09:31:34 -0700 Subject: [PATCH 159/559] Drivers: hv: Fix bad ref to hv_synic_eventring_tail when CPU goes offline When a CPU goes offline, hv_common_cpu_die() frees the hv_synic_eventring_tail memory for the CPU. But in a normal VM (i.e., not running in the root partition) the per-CPU memory has not been allocated, resulting in a bad memory reference and oops when computing the argument to kfree(). Fix this by freeing the memory only when running in the root partition. Fixes: 04df7ac39943 ("Drivers: hv: Introduce per-cpu event ring tail") Signed-off-by: Michael Kelley Reviewed-by: Nuno Das Neves Link: https://lore.kernel.org/r/20250421163134.2024-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250421163134.2024-1-mhklinux@outlook.com> --- drivers/hv/hv_common.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index a7d7494feaca..59792e00cecf 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -566,9 +566,11 @@ int hv_common_cpu_die(unsigned int cpu) * originally allocated memory is reused in hv_common_cpu_init(). */ - synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail); - kfree(*synic_eventring_tail); - *synic_eventring_tail = NULL; + if (hv_root_partition()) { + synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail); + kfree(*synic_eventring_tail); + *synic_eventring_tail = NULL; + } return 0; } From e86e9134e1d1c90a960dd57f59ce574d27b9a124 Mon Sep 17 00:00:00 2001 From: Sean Heelan Date: Sat, 19 Apr 2025 19:59:28 +0100 Subject: [PATCH 160/559] ksmbd: fix use-after-free in kerberos authentication Setting sess->user = NULL was introduced to fix the dangling pointer created by ksmbd_free_user. However, it is possible another thread could be operating on the session and make use of sess->user after it has been passed to ksmbd_free_user but before sess->user is set to NULL. Cc: stable@vger.kernel.org Signed-off-by: Sean Heelan Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/auth.c | 14 +++++++++++++- fs/smb/server/smb2pdu.c | 5 ----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/auth.c b/fs/smb/server/auth.c index 83caa3849749..b3d121052408 100644 --- a/fs/smb/server/auth.c +++ b/fs/smb/server/auth.c @@ -550,7 +550,19 @@ int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob, retval = -ENOMEM; goto out; } - sess->user = user; + + if (!sess->user) { + /* First successful authentication */ + sess->user = user; + } else { + if (!ksmbd_compare_user(sess->user, user)) { + ksmbd_debug(AUTH, "different user tried to reuse session\n"); + retval = -EPERM; + ksmbd_free_user(user); + goto out; + } + ksmbd_free_user(user); + } memcpy(sess->sess_key, resp->payload, resp->session_key_len); memcpy(out_blob, resp->payload + resp->session_key_len, diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 372021b3f263..acc05657cfc7 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1607,11 +1607,6 @@ static int krb5_authenticate(struct ksmbd_work *work, if (prev_sess_id && prev_sess_id != sess->id) destroy_previous_session(conn, sess->user, prev_sess_id); - if (sess->state == SMB2_SESSION_VALID) { - ksmbd_free_user(sess->user); - sess->user = NULL; - } - retval = ksmbd_krb5_authenticate(sess, in_blob, in_len, out_blob, &out_len); if (retval) { From 2fc9feff45d92a92cd5f96487655d5be23fb7e2b Mon Sep 17 00:00:00 2001 From: Sean Heelan Date: Mon, 21 Apr 2025 15:39:29 +0000 Subject: [PATCH 161/559] ksmbd: fix use-after-free in session logoff The sess->user object can currently be in use by another thread, for example if another connection has sent a session setup request to bind to the session being free'd. The handler for that connection could be in the smb2_sess_setup function which makes use of sess->user. Cc: stable@vger.kernel.org Signed-off-by: Sean Heelan Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index acc05657cfc7..46aa08245742 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2249,10 +2249,6 @@ int smb2_session_logoff(struct ksmbd_work *work) sess->state = SMB2_SESSION_EXPIRED; up_write(&conn->session_lock); - if (sess->user) { - ksmbd_free_user(sess->user); - sess->user = NULL; - } ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_SETUP); rsp->StructureSize = cpu_to_le16(4); From 4c2227656d9003f4d77afc76f34dd81b95e4c2c4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 23 Apr 2025 15:36:00 +0200 Subject: [PATCH 162/559] vmxnet3: Fix malformed packet sizing in vmxnet3_process_xdp vmxnet3 driver's XDP handling is buggy for packet sizes using ring0 (that is, packet sizes between 128 - 3k bytes). We noticed MTU-related connectivity issues with Cilium's service load- balancing in case of vmxnet3 as NIC underneath. A simple curl to a HTTP backend service where the XDP LB was doing IPIP encap led to overly large packet sizes but only for *some* of the packets (e.g. HTTP GET request) while others (e.g. the prior TCP 3WHS) looked completely fine on the wire. In fact, the pcap recording on the backend node actually revealed that the node with the XDP LB was leaking uninitialized kernel data onto the wire for the affected packets, for example, while the packets should have been 152 bytes their actual size was 1482 bytes, so the remainder after 152 bytes was padded with whatever other data was in that page at the time (e.g. we saw user/payload data from prior processed packets). We only noticed this through an MTU issue, e.g. when the XDP LB node and the backend node both had the same MTU (e.g. 1500) then the curl request got dropped on the backend node's NIC given the packet was too large even though the IPIP-encapped packet normally would never even come close to the MTU limit. Lowering the MTU on the XDP LB (e.g. 1480) allowed to let the curl request succeed (which also indicates that the kernel ignored the padding, and thus the issue wasn't very user-visible). Commit e127ce7699c1 ("vmxnet3: Fix missing reserved tailroom") was too eager to also switch xdp_prepare_buff() from rcd->len to rbi->len. It really needs to stick to rcd->len which is the actual packet length from the descriptor. The latter we also feed into vmxnet3_process_xdp_small(), by the way, and it indicates the correct length needed to initialize the xdp->{data,data_end} parts. For e127ce7699c1 ("vmxnet3: Fix missing reserved tailroom") the relevant part was adapting xdp_init_buff() to address the warning given the xdp_data_hard_end() depends on xdp->frame_sz. With that fixed, traffic on the wire looks good again. Fixes: e127ce7699c1 ("vmxnet3: Fix missing reserved tailroom") Signed-off-by: Daniel Borkmann Tested-by: Andrew Sauber Cc: Anton Protopopov Cc: William Tu Cc: Martin Zaharinov Cc: Ronak Doshi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250423133600.176689-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- drivers/net/vmxnet3/vmxnet3_xdp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c index 616ecc38d172..5f470499e600 100644 --- a/drivers/net/vmxnet3/vmxnet3_xdp.c +++ b/drivers/net/vmxnet3/vmxnet3_xdp.c @@ -397,7 +397,7 @@ vmxnet3_process_xdp(struct vmxnet3_adapter *adapter, xdp_init_buff(&xdp, PAGE_SIZE, &rq->xdp_rxq); xdp_prepare_buff(&xdp, page_address(page), rq->page_pool->p.offset, - rbi->len, false); + rcd->len, false); xdp_buff_clear_frags_flag(&xdp); xdp_prog = rcu_dereference(rq->adapter->xdp_bpf_prog); From 5ec6d7d737a491256cd37e33910f7ac1978db591 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 25 Apr 2025 01:37:33 +0300 Subject: [PATCH 163/559] net: mscc: ocelot: delete PVID VLAN when readding it as non-PVID The following set of commands: ip link add br0 type bridge vlan_filtering 1 # vlan_default_pvid 1 is implicit ip link set swp0 master br0 bridge vlan add dev swp0 vid 1 should result in the dropping of untagged and 802.1p-tagged traffic, but we see that it continues to be accepted. Whereas, had we deleted VID 1 instead, the aforementioned dropping would have worked This is because the ANA_PORT_DROP_CFG update logic doesn't run, because ocelot_vlan_add() only calls ocelot_port_set_pvid() if the new VLAN has the BRIDGE_VLAN_INFO_PVID flag. Similar to other drivers like mt7530_port_vlan_add() which handle this case correctly, we need to test whether the VLAN we're changing used to have the BRIDGE_VLAN_INFO_PVID flag, but lost it now. That amounts to a PVID deletion and should be treated as such. Regarding blame attribution: this never worked properly since the introduction of bridge VLAN filtering in commit 7142529f1688 ("net: mscc: ocelot: add VLAN filtering"). However, there was a significant paradigm shift which aligned the ANA_PORT_DROP_CFG register with the PVID concept rather than with the native VLAN concept, and that change wasn't targeted for 'stable'. Realistically, that is as far as this fix needs to be propagated to. Fixes: be0576fed6d3 ("net: mscc: ocelot: move the logic to drop 802.1p traffic to the pvid deletion") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250424223734.3096202-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index ef93df520887..08bee56aea35 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -830,6 +830,7 @@ EXPORT_SYMBOL(ocelot_vlan_prepare); int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, bool untagged) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; int err; /* Ignore VID 0 added to our RX filter by the 8021q module, since @@ -849,6 +850,11 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, ocelot_bridge_vlan_find(ocelot, vid)); if (err) return err; + } else if (ocelot_port->pvid_vlan && + ocelot_bridge_vlan_find(ocelot, vid) == ocelot_port->pvid_vlan) { + err = ocelot_port_set_pvid(ocelot, port, NULL); + if (err) + return err; } /* Untagged egress vlan clasification */ From bf9de1dcd0eecd16020a677c900a70ea9b0a9714 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 25 Apr 2025 01:37:34 +0300 Subject: [PATCH 164/559] selftests: net: bridge_vlan_aware: test untagged/8021p-tagged with and without PVID Recent discussions around commit ad1afb003939 ("vlan_dev: VLAN 0 should be treated as "no vlan tag" (802.1p packet)") have sparked the question what happens with the DSA (and possibly other switchdev) data path when the bridge says that ports should have no PVID VLAN, but the 8021q module, as the result of a NETDEV_UP event, decides it should add VID 0 to the RX filter of those bridge ports. Do those bridge ports receive packets tagged with VID 0 or not, now? We don't know, there is no test. In the veth realm, this passes trivially, because veth is not VLAN filtering and this, the 8021q module lacks the instinct to add VID 0 in the first place. In the realm of VLAN filtering NICs with no switchdev offload, this should also pass, because the VLAN groups of the software bridge are consulted, where it can clearly be seen that a PVID is missing, even though the packet was initially accepted by the NIC. The test only poses a challenge for switchdev drivers, which usually have to program to hardware both VLANs from RX filtering, as well as from switchdev. Especially when a switchdev port joins a VLAN-aware bridge, it is unavoidable that it gains the NETIF_F_HW_VLAN_CTAG_FILTER feature, i.e. any 8021q uppers that the bridge port may have must also be committed to the RX filtering table of the interface. When a VLAN-tagged packet is physically received by the port, it is initially indistinguishable whether it will reach the bridge data path or the 8021q upper data path. That is rather the final step of the new tests that we introduce. We need to build context up to that stage, which means the following: - we need to test that 802.1p (VID 0) tagged traffic is received in the first place (on bridge ports with a valid PVID). This is the "8021p" test. - we need to test that the usual paths of reaching a configuration with no PVID on a bridge port are all covered and they all reach the same state. Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/20250424223734.3096202-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- .../net/forwarding/bridge_vlan_aware.sh | 96 ++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 90f8a244ea90..e59fba366a0a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid 8021p drop_untagged" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -194,6 +194,100 @@ other_tpid() tc qdisc del dev $h2 clsact } +8021p_do() +{ + local should_fail=$1; shift + local mac=de:ad:be:ef:13:37 + + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ -q $h1 -c 1 -b $mac -a own "81:00 00:00 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err_fail $should_fail $? "802.1p-tagged reception" + + tc filter del dev $h2 ingress pref 1 +} + +8021p() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with the default_pvid, 1, packets tagged with VID 0 are + # accepted. + 8021p_do 0 + + # Test that packets tagged with VID 0 are still accepted after changing + # the default_pvid. + ip link set br0 type bridge vlan_default_pvid 10 + 8021p_do 0 + + log_test "Reception of 802.1p-tagged traffic" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + +send_untagged_and_8021p() +{ + ping_do $h1 192.0.2.2 + check_fail $? + + 8021p_do 1 +} + +drop_untagged() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with no PVID, untagged and 802.1p-tagged traffic is + # dropped. + ip link set br0 type bridge vlan_default_pvid 1 + + # First we reconfigure the default_pvid, 1, as a non-PVID VLAN. + bridge vlan add dev $swp1 vid 1 untagged + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Next we try to delete VID 1 altogether + bridge vlan del dev $swp1 vid 1 + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Set up the bridge without a default_pvid, then check that the 8021q + # module, when the bridge port goes down and then up again, does not + # accidentally re-enable untagged packet reception. + ip link set br0 type bridge vlan_default_pvid 0 + ip link set $swp1 down + ip link set $swp1 up + setup_wait + send_untagged_and_8021p + + # Remove swp1 as a bridge port and let it rejoin the bridge while it + # has no default_pvid. + ip link set $swp1 nomaster + ip link set $swp1 master br0 + send_untagged_and_8021p + + # Restore settings + ip link set br0 type bridge vlan_default_pvid 1 + + log_test "Dropping of untagged and 802.1p-tagged traffic with no PVID" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + trap cleanup EXIT setup_prepare From 765f253e28909f161b0211f85cf0431cfee7d6df Mon Sep 17 00:00:00 2001 From: Christian Heusel Date: Thu, 24 Apr 2025 16:00:28 +0200 Subject: [PATCH 165/559] Revert "rndis_host: Flag RNDIS modems as WWAN devices" This reverts commit 67d1a8956d2d62fe6b4c13ebabb57806098511d8. Since this commit has been proven to be problematic for the setup of USB-tethered ethernet connections and the related breakage is very noticeable for users it should be reverted until a fixed version of the change can be rolled out. Closes: https://lore.kernel.org/all/e0df2d85-1296-4317-b717-bd757e3ab928@heusel.eu/ Link: https://chaos.social/@gromit/114377862699921553 Link: https://bugzilla.kernel.org/show_bug.cgi?id=220002 Link: https://bugs.gentoo.org/953555 Link: https://bbs.archlinux.org/viewtopic.php?id=304892 Cc: stable@vger.kernel.org Acked-by: Lubomir Rintel Signed-off-by: Christian Heusel Link: https://patch.msgid.link/20250424-usb-tethering-fix-v1-1-b65cf97c740e@heusel.eu Signed-off-by: Jakub Kicinski --- drivers/net/usb/rndis_host.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index bb0bf1415872..7b3739b29c8f 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -630,16 +630,6 @@ static const struct driver_info zte_rndis_info = { .tx_fixup = rndis_tx_fixup, }; -static const struct driver_info wwan_rndis_info = { - .description = "Mobile Broadband RNDIS device", - .flags = FLAG_WWAN | FLAG_POINTTOPOINT | FLAG_FRAMING_RN | FLAG_NO_SETINT, - .bind = rndis_bind, - .unbind = rndis_unbind, - .status = rndis_status, - .rx_fixup = rndis_rx_fixup, - .tx_fixup = rndis_tx_fixup, -}; - /*-------------------------------------------------------------------------*/ static const struct usb_device_id products [] = { @@ -676,11 +666,9 @@ static const struct usb_device_id products [] = { USB_INTERFACE_INFO(USB_CLASS_WIRELESS_CONTROLLER, 1, 3), .driver_info = (unsigned long) &rndis_info, }, { - /* Mobile Broadband Modem, seen in Novatel Verizon USB730L and - * Telit FN990A (RNDIS) - */ + /* Novatel Verizon USB730L */ USB_INTERFACE_INFO(USB_CLASS_MISC, 4, 1), - .driver_info = (unsigned long)&wwan_rndis_info, + .driver_info = (unsigned long) &rndis_info, }, { }, // END }; From 8548c84c004be3da4ffbe35ed0589041a4050c03 Mon Sep 17 00:00:00 2001 From: Sathesh B Edara Date: Thu, 24 Apr 2025 06:39:44 -0700 Subject: [PATCH 166/559] octeon_ep_vf: Resolve netdevice usage count issue The netdevice usage count increases during transmit queue timeouts because netdev_hold is called in ndo_tx_timeout, scheduling a task to reinitialize the card. Although netdev_put is called at the end of the scheduled work, rtnl_unlock checks the reference count during cleanup. This could cause issues if transmit timeout is called on multiple queues. Fixes: cb7dd712189f ("octeon_ep_vf: Add driver framework and device initialization") Signed-off-by: Sathesh B Edara Link: https://patch.msgid.link/20250424133944.28128-1-sedara@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c index 18c922dd5fc6..ccb69bc5c952 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c @@ -835,7 +835,9 @@ static void octep_vf_tx_timeout(struct net_device *netdev, unsigned int txqueue) struct octep_vf_device *oct = netdev_priv(netdev); netdev_hold(netdev, NULL, GFP_ATOMIC); - schedule_work(&oct->tx_timeout_task); + if (!schedule_work(&oct->tx_timeout_task)) + netdev_put(netdev, NULL); + } static int octep_vf_set_mac(struct net_device *netdev, void *p) From 8f7ae5a85137b913cb97e2d24409d36548d0bab1 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 24 Apr 2025 05:55:47 -0700 Subject: [PATCH 167/559] bnxt_en: improve TX timestamping FIFO configuration Reconfiguration of netdev may trigger close/open procedure which can break FIFO status by adjusting the amount of empty slots for TX timestamps. But it is not really needed because timestamps for the packets sent over the wire still can be retrieved. On the other side, during netdev close procedure any skbs waiting for TX timestamps can be leaked because there is no cleaning procedure called. Free skbs waiting for TX timestamps when closing netdev. Fixes: 8aa2a79e9b95 ("bnxt_en: Increase the max total outstanding PTP TX packets to 4") Reviewed-by: Michael Chan Reviewed-by: Pavan Chebbi Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20250424125547.460632-1-vadfed@meta.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 29 ++++++++++++++----- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 1 + 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c8e3468eee61..2c8e2c19d854 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3414,6 +3414,9 @@ static void bnxt_free_tx_skbs(struct bnxt *bp) bnxt_free_one_tx_ring_skbs(bp, txr, i); } + + if (bp->ptp_cfg && !(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) + bnxt_ptp_free_txts_skbs(bp->ptp_cfg); } static void bnxt_free_one_rx_ring(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) @@ -12797,8 +12800,6 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) /* VF-reps may need to be re-opened after the PF is re-opened */ if (BNXT_PF(bp)) bnxt_vf_reps_open(bp); - if (bp->ptp_cfg && !(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) - WRITE_ONCE(bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS); bnxt_ptp_init_rtc(bp, true); bnxt_ptp_cfg_tstamp_filters(bp); if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 2d4e19b96ee7..0669d43472f5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -794,6 +794,27 @@ next_slot: return HZ; } +void bnxt_ptp_free_txts_skbs(struct bnxt_ptp_cfg *ptp) +{ + struct bnxt_ptp_tx_req *txts_req; + u16 cons = ptp->txts_cons; + + /* make sure ptp aux worker finished with + * possible BNXT_STATE_OPEN set + */ + ptp_cancel_worker_sync(ptp->ptp_clock); + + ptp->tx_avail = BNXT_MAX_TX_TS; + while (cons != ptp->txts_prod) { + txts_req = &ptp->txts_req[cons]; + if (!IS_ERR_OR_NULL(txts_req->tx_skb)) + dev_kfree_skb_any(txts_req->tx_skb); + cons = NEXT_TXTS(cons); + } + ptp->txts_cons = cons; + ptp_schedule_worker(ptp->ptp_clock, 0); +} + int bnxt_ptp_get_txts_prod(struct bnxt_ptp_cfg *ptp, u16 *prod) { spin_lock_bh(&ptp->ptp_tx_lock); @@ -1105,7 +1126,6 @@ out: void bnxt_ptp_clear(struct bnxt *bp) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; - int i; if (!ptp) return; @@ -1117,12 +1137,5 @@ void bnxt_ptp_clear(struct bnxt *bp) kfree(ptp->ptp_info.pin_config); ptp->ptp_info.pin_config = NULL; - for (i = 0; i < BNXT_MAX_TX_TS; i++) { - if (ptp->txts_req[i].tx_skb) { - dev_kfree_skb_any(ptp->txts_req[i].tx_skb); - ptp->txts_req[i].tx_skb = NULL; - } - } - bnxt_unmap_ptp_regs(bp); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index a95f05e9c579..0481161d26ef 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -162,6 +162,7 @@ int bnxt_ptp_cfg_tstamp_filters(struct bnxt *bp); void bnxt_ptp_reapply_pps(struct bnxt *bp); int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr); int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); +void bnxt_ptp_free_txts_skbs(struct bnxt_ptp_cfg *ptp); int bnxt_ptp_get_txts_prod(struct bnxt_ptp_cfg *ptp, u16 *prod); void bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb, u16 prod); int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts); From 68f9d8974b545668e1be2422240b25a92e304b14 Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Thu, 24 Apr 2025 12:04:44 +0800 Subject: [PATCH 168/559] rtase: Modify the condition used to detect overflow in rtase_calc_time_mitigation Fix the following compile error reported by the kernel test robot by modifying the condition used to detect overflow in rtase_calc_time_mitigation. In file included from include/linux/mdio.h:10:0, from drivers/net/ethernet/realtek/rtase/rtase_main.c:58: In function 'u16_encode_bits', inlined from 'rtase_calc_time_mitigation.constprop' at drivers/net/ ethernet/realtek/rtase/rtase_main.c:1915:13, inlined from 'rtase_init_software_variable.isra.41' at drivers/net/ ethernet/realtek/rtase/rtase_main.c:1961:13, inlined from 'rtase_init_one' at drivers/net/ethernet/realtek/ rtase/rtase_main.c:2111:2: >> include/linux/bitfield.h:178:3: error: call to '__field_overflow' declared with attribute error: value doesn't fit into mask __field_overflow(); \ ^~~~~~~~~~~~~~~~~~ include/linux/bitfield.h:198:2: note: in expansion of macro '____MAKE_OP' ____MAKE_OP(u##size,u##size,,) ^~~~~~~~~~~ include/linux/bitfield.h:200:1: note: in expansion of macro '__MAKE_OP' __MAKE_OP(16) ^~~~~~~~~ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503182158.nkAlbJWX-lkp@intel.com/ Fixes: a36e9f5cfe9e ("rtase: Add support for a pci table in this module") Signed-off-by: Justin Lai Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250424040444.5530-1-justinlai0215@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/rtase/rtase_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index 2aacc1996796..55b8d3666153 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -1925,8 +1925,8 @@ static u16 rtase_calc_time_mitigation(u32 time_us) time_us = min_t(int, time_us, RTASE_MITI_MAX_TIME); - msb = fls(time_us); - if (msb >= RTASE_MITI_COUNT_BIT_NUM) { + if (time_us > RTASE_MITI_TIME_COUNT_MASK) { + msb = fls(time_us); time_unit = msb - RTASE_MITI_COUNT_BIT_NUM; time_count = time_us >> (msb - RTASE_MITI_COUNT_BIT_NUM); } else { From 6fe0866014486736cc3ba1c6fd4606d3dbe55c9c Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Thu, 24 Apr 2025 10:38:48 +0200 Subject: [PATCH 169/559] net: ethernet: mtk-star-emac: fix spinlock recursion issues on rx/tx poll Use spin_lock_irqsave and spin_unlock_irqrestore instead of spin_lock and spin_unlock in mtk_star_emac driver to avoid spinlock recursion occurrence that can happen when enabling the DMA interrupts again in rx/tx poll. ``` BUG: spinlock recursion on CPU#0, swapper/0/0 lock: 0xffff00000db9cf20, .magic: dead4ead, .owner: swapper/0/0, .owner_cpu: 0 CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.15.0-rc2-next-20250417-00001-gf6a27738686c-dirty #28 PREEMPT Hardware name: MediaTek MT8365 Open Platform EVK (DT) Call trace: show_stack+0x18/0x24 (C) dump_stack_lvl+0x60/0x80 dump_stack+0x18/0x24 spin_dump+0x78/0x88 do_raw_spin_lock+0x11c/0x120 _raw_spin_lock+0x20/0x2c mtk_star_handle_irq+0xc0/0x22c [mtk_star_emac] __handle_irq_event_percpu+0x48/0x140 handle_irq_event+0x4c/0xb0 handle_fasteoi_irq+0xa0/0x1bc handle_irq_desc+0x34/0x58 generic_handle_domain_irq+0x1c/0x28 gic_handle_irq+0x4c/0x120 do_interrupt_handler+0x50/0x84 el1_interrupt+0x34/0x68 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x6c/0x70 regmap_mmio_read32le+0xc/0x20 (P) _regmap_bus_reg_read+0x6c/0xac _regmap_read+0x60/0xdc regmap_read+0x4c/0x80 mtk_star_rx_poll+0x2f4/0x39c [mtk_star_emac] __napi_poll+0x38/0x188 net_rx_action+0x164/0x2c0 handle_softirqs+0x100/0x244 __do_softirq+0x14/0x20 ____do_softirq+0x10/0x20 call_on_irq_stack+0x24/0x64 do_softirq_own_stack+0x1c/0x40 __irq_exit_rcu+0xd4/0x10c irq_exit_rcu+0x10/0x1c el1_interrupt+0x38/0x68 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x6c/0x70 cpuidle_enter_state+0xac/0x320 (P) cpuidle_enter+0x38/0x50 do_idle+0x1e4/0x260 cpu_startup_entry+0x34/0x3c rest_init+0xdc/0xe0 console_on_rootfs+0x0/0x6c __primary_switched+0x88/0x90 ``` Fixes: 0a8bd81fd6aa ("net: ethernet: mtk-star-emac: separate tx/rx handling with two NAPIs") Signed-off-by: Louis-Alexis Eyraud Reviewed-by: Maxime Chevallier Acked-by: Bartosz Golaszewski Link: https://patch.msgid.link/20250424-mtk_star_emac-fix-spinlock-recursion-issue-v2-1-f3fde2e529d8@collabora.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_star_emac.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 76f202d7f055..23115881d8e8 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1163,6 +1163,7 @@ static int mtk_star_tx_poll(struct napi_struct *napi, int budget) struct net_device *ndev = priv->ndev; unsigned int head = ring->head; unsigned int entry = ring->tail; + unsigned long flags; while (entry != head && count < (MTK_STAR_RING_NUM_DESCS - 1)) { ret = mtk_star_tx_complete_one(priv); @@ -1182,9 +1183,9 @@ static int mtk_star_tx_poll(struct napi_struct *napi, int budget) netif_wake_queue(ndev); if (napi_complete(napi)) { - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); mtk_star_enable_dma_irq(priv, false, true); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } return 0; @@ -1341,6 +1342,7 @@ push_new_skb: static int mtk_star_rx_poll(struct napi_struct *napi, int budget) { struct mtk_star_priv *priv; + unsigned long flags; int work_done = 0; priv = container_of(napi, struct mtk_star_priv, rx_napi); @@ -1348,9 +1350,9 @@ static int mtk_star_rx_poll(struct napi_struct *napi, int budget) work_done = mtk_star_rx(priv, budget); if (work_done < budget) { napi_complete_done(napi, work_done); - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); mtk_star_enable_dma_irq(priv, true, false); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } return work_done; From e54b4db35e201a9173da9cb7abc8377e12abaf87 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Thu, 24 Apr 2025 10:38:49 +0200 Subject: [PATCH 170/559] net: ethernet: mtk-star-emac: rearm interrupts in rx_poll only when advised In mtk_star_rx_poll function, on event processing completion, the mtk_star_emac driver calls napi_complete_done but ignores its return code and enable RX DMA interrupts inconditionally. This return code gives the info if a device should avoid rearming its interrupts or not, so fix this behaviour by taking it into account. Fixes: 8c7bd5a454ff ("net: ethernet: mtk-star-emac: new driver") Signed-off-by: Louis-Alexis Eyraud Acked-by: Bartosz Golaszewski Link: https://patch.msgid.link/20250424-mtk_star_emac-fix-spinlock-recursion-issue-v2-2-f3fde2e529d8@collabora.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_star_emac.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 23115881d8e8..b175119a6a7d 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1348,8 +1348,7 @@ static int mtk_star_rx_poll(struct napi_struct *napi, int budget) priv = container_of(napi, struct mtk_star_priv, rx_napi); work_done = mtk_star_rx(priv, budget); - if (work_done < budget) { - napi_complete_done(napi, work_done); + if (work_done < budget && napi_complete_done(napi, work_done)) { spin_lock_irqsave(&priv->lock, flags); mtk_star_enable_dma_irq(priv, true, false); spin_unlock_irqrestore(&priv->lock, flags); From e8fa236e28811473db1594c597f974da0e9b753b Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Fri, 25 Apr 2025 18:36:18 +0800 Subject: [PATCH 171/559] ALSA: hda: Apply volume control on speaker+lineout for HP EliteStudio AIO This hardware has ALC274 codec with speaker NID 0x17 and line out NID 0x16 for audio output. The line out is routed correctly but the speaker is not. Thus the volume can't be controlled. This patch removes DAC NID 0x06 (without volume control) from the connection list for speaker NID 0x17. Routing both speaker and line out pins to DAC NID 0x02 which controls the output volume. Signed-off-by: Chris Chiu Link: https://patch.msgid.link/20250425103618.534951-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8ed613932c5b..38ed264c3a49 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6742,6 +6742,25 @@ static void alc274_fixup_bind_dacs(struct hda_codec *codec, codec->power_save_node = 0; } +/* avoid DAC 0x06 for speaker switch 0x17; it has no volume control */ +static void alc274_fixup_hp_aio_bind_dacs(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const hda_nid_t conn[] = { 0x02, 0x03 }; /* exclude 0x06 */ + /* The speaker is routed to the Node 0x06 by a mistake, thus the + * speaker's volume can't be adjusted since the node doesn't have + * Amp-out capability. Assure the speaker and lineout pin to be + * coupled with DAC NID 0x02. + */ + static const hda_nid_t preferred_pairs[] = { + 0x16, 0x02, 0x17, 0x02, 0x21, 0x03, 0 + }; + struct alc_spec *spec = codec->spec; + + snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); + spec->gen.preferred_dacs = preferred_pairs; +} + /* avoid DAC 0x06 for bass speaker 0x17; it has no volume control */ static void alc289_fixup_asus_ga401(struct hda_codec *codec, const struct hda_fixup *fix, int action) @@ -7970,6 +7989,7 @@ enum { ALC294_FIXUP_BASS_SPEAKER_15, ALC283_FIXUP_DELL_HP_RESUME, ALC294_FIXUP_ASUS_CS35L41_SPI_2, + ALC274_FIXUP_HP_AIO_BIND_DACS, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -10340,6 +10360,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC, }, + [ALC274_FIXUP_HP_AIO_BIND_DACS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc274_fixup_hp_aio_bind_dacs, + }, }; static const struct hda_quirk alc269_fixup_tbl[] = { @@ -10774,6 +10798,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d18, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), SND_PCI_QUIRK(0x103c, 0x8d84, "HP EliteBook X G1i", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 14 G12", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d86, "HP Elite X360 14 G12", ALC285_FIXUP_HP_GPIO_LED), @@ -10793,6 +10818,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8da1, "HP 16 Clipper OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da7, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da8, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8dd4, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), SND_PCI_QUIRK(0x103c, 0x8de8, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8de9, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8dec, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED), From 56651128e2fbad80f632f388d6bf1f39c928267a Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Thu, 3 Apr 2025 18:11:42 +0200 Subject: [PATCH 172/559] MIPS: Fix idle VS timer enqueue MIPS re-enables interrupts on its idle routine and performs a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep. The IRQs firing between the check and the 'wait' instruction may set the TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs interrupting __r4k_wait() rollback their return address to the beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked again before going back to sleep. However idle IRQs can also queue timers that may require a tick reprogramming through a new generic idle loop iteration but those timers would go unnoticed here because __r4k_wait() only checks TIF_NEED_RESCHED. It doesn't check for pending timers. Fix this with fast-forwarding idle IRQs return address to the end of the idle routine instead of the beginning, so that the generic idle loop handles both TIF_NEED_RESCHED and pending timers. CONFIG_CPU_MICROMIPS has been removed along with the nop instructions. There, NOPs are 2 byte in size, so change the code with 3 _ssnop which are always 4 byte and remove the ifdef. Added ehb to make sure the hazard is always cleared. Fixes: c65a5480ff29 ("[MIPS] Fix potential latency problem due to non-atomic cpu_wait.") Signed-off-by: Marco Crivellari Signed-off-by: Maciej W. Rozycki Acked-by: Frederic Weisbecker Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/idle.h | 3 +- arch/mips/kernel/genex.S | 62 +++++++++++++++++++++--------------- arch/mips/kernel/idle.c | 7 ---- 3 files changed, 37 insertions(+), 35 deletions(-) diff --git a/arch/mips/include/asm/idle.h b/arch/mips/include/asm/idle.h index 0992cad9c632..2bc3678455ed 100644 --- a/arch/mips/include/asm/idle.h +++ b/arch/mips/include/asm/idle.h @@ -6,8 +6,7 @@ #include extern void (*cpu_wait)(void); -extern void r4k_wait(void); -extern asmlinkage void __r4k_wait(void); +extern asmlinkage void r4k_wait(void); extern void r4k_wait_irqoff(void); static inline int using_rollback_handler(void) diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index a572ce36a24f..46d975d00298 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -104,42 +104,52 @@ handle_vcei: __FINIT - .align 5 /* 32 byte rollback region */ -LEAF(__r4k_wait) - .set push - .set noreorder - /* start of rollback region */ - LONG_L t0, TI_FLAGS($28) - nop - andi t0, _TIF_NEED_RESCHED - bnez t0, 1f - nop - nop - nop -#ifdef CONFIG_CPU_MICROMIPS - nop - nop - nop - nop -#endif + /* Align to 32 bytes for the maximum idle interrupt region size. */ + .align 5 +LEAF(r4k_wait) + /* Keep the ISA bit clear for calculations on local labels here. */ +0: .fill 0 + /* Start of idle interrupt region. */ + local_irq_enable + /* + * If an interrupt lands here, before going idle on the next + * instruction, we must *NOT* go idle since the interrupt could + * have set TIF_NEED_RESCHED or caused a timer to need resched. + * Fall through -- see rollback_handler below -- and have the + * idle loop take care of things. + */ +1: .fill 0 + /* The R2 EI/EHB sequence takes 8 bytes, otherwise pad up. */ + .if 1b - 0b > 32 + .error "overlong idle interrupt region" + .elseif 1b - 0b > 8 + .align 4 + .endif +2: .fill 0 + .equ r4k_wait_idle_size, 2b - 0b + /* End of idle interrupt region; size has to be a power of 2. */ .set MIPS_ISA_ARCH_LEVEL_RAW +r4k_wait_insn: wait - /* end of rollback region (the region size must be power of two) */ -1: +r4k_wait_exit: + .set mips0 + local_irq_disable jr ra - nop - .set pop - END(__r4k_wait) + END(r4k_wait) + .previous .macro BUILD_ROLLBACK_PROLOGUE handler FEXPORT(rollback_\handler) .set push .set noat MFC0 k0, CP0_EPC - PTR_LA k1, __r4k_wait - ori k0, 0x1f /* 32 byte rollback region */ - xori k0, 0x1f + /* Subtract/add 2 to let the ISA bit propagate through the mask. */ + PTR_LA k1, r4k_wait_insn - 2 + ori k0, r4k_wait_idle_size - 2 + .set noreorder bne k0, k1, \handler + PTR_ADDIU k0, r4k_wait_exit - r4k_wait_insn + 2 + .set reorder MTC0 k0, CP0_EPC .set pop .endm diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 5abc8b7340f8..80e8a04a642e 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -35,13 +35,6 @@ static void __cpuidle r3081_wait(void) write_c0_conf(cfg | R30XX_CONF_HALT); } -void __cpuidle r4k_wait(void) -{ - raw_local_irq_enable(); - __r4k_wait(); - raw_local_irq_disable(); -} - /* * This variant is preferable as it allows testing need_resched and going to * sleep depending on the outcome atomically. Unfortunately the "It is From b713f27e32d87c35737ec942dd6f5ed6b7475f48 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Thu, 3 Apr 2025 18:11:43 +0200 Subject: [PATCH 173/559] MIPS: Move r4k_wait() to .cpuidle.text section Fix missing .cpuidle.text section assignment for r4k_wait() to correct backtracing with nmi_backtrace(). Fixes: 97c8580e85cf ("MIPS: Annotate cpu_wait implementations with __cpuidle") Signed-off-by: Marco Crivellari Signed-off-by: Maciej W. Rozycki Acked-by: Frederic Weisbecker Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/genex.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 46d975d00298..2cf312d9a3b0 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -104,6 +104,7 @@ handle_vcei: __FINIT + .section .cpuidle.text,"ax" /* Align to 32 bytes for the maximum idle interrupt region size. */ .align 5 LEAF(r4k_wait) From be0c40da888840fe91b45474cb70779e6cbaf7ca Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 27 Apr 2025 10:10:34 +0200 Subject: [PATCH 174/559] ALSA: hda/realtek: Add quirk for HP Spectre x360 15-df1xxx HP Spectre x360 15-df1xxx with SSID 13c:863e requires similar workarounds that were applied to another HP Spectre x360 models; it has a mute LED only, no micmute LEDs, and needs the speaker GPIO seup. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220054 Link: https://patch.msgid.link/20250427081035.11567-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 38ed264c3a49..1799203af35a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6982,6 +6982,41 @@ static void alc285_fixup_hp_spectre_x360_eb1(struct hda_codec *codec, } } +/* GPIO1 = amplifier on/off */ +static void alc285_fixup_hp_spectre_x360_df1(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + struct alc_spec *spec = codec->spec; + static const hda_nid_t conn[] = { 0x02 }; + static const struct hda_pintbl pincfgs[] = { + { 0x14, 0x90170110 }, /* front/high speakers */ + { 0x17, 0x90170130 }, /* back/bass speakers */ + { } + }; + + // enable mute led + alc285_fixup_hp_mute_led_coefbit(codec, fix, action); + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + /* needed for amp of back speakers */ + spec->gpio_mask |= 0x01; + spec->gpio_dir |= 0x01; + snd_hda_apply_pincfgs(codec, pincfgs); + /* share DAC to have unified volume control */ + snd_hda_override_conn_list(codec, 0x14, ARRAY_SIZE(conn), conn); + snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); + break; + case HDA_FIXUP_ACT_INIT: + /* need to toggle GPIO to enable the amp of back speakers */ + alc_update_gpio_data(codec, 0x01, true); + msleep(100); + alc_update_gpio_data(codec, 0x01, false); + break; + } +} + static void alc285_fixup_hp_spectre_x360(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -7780,6 +7815,7 @@ enum { ALC280_FIXUP_HP_9480M, ALC245_FIXUP_HP_X360_AMP, ALC285_FIXUP_HP_SPECTRE_X360_EB1, + ALC285_FIXUP_HP_SPECTRE_X360_DF1, ALC285_FIXUP_HP_ENVY_X360, ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, @@ -9857,6 +9893,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_spectre_x360_eb1 }, + [ALC285_FIXUP_HP_SPECTRE_X360_DF1] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_spectre_x360_df1 + }, [ALC285_FIXUP_HP_ENVY_X360] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_envy_x360, @@ -10588,6 +10628,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x86c1, "HP Laptop 15-da3001TU", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), + SND_PCI_QUIRK(0x103c, 0x863e, "HP Spectre x360 15-df1xxx", ALC285_FIXUP_HP_SPECTRE_X360_DF1), SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x86f9, "HP Spectre x360 13-aw0xxx", ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), @@ -11520,6 +11561,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360_EB1, .name = "alc285-hp-spectre-x360-eb1"}, + {.id = ALC285_FIXUP_HP_SPECTRE_X360_DF1, .name = "alc285-hp-spectre-x360-df1"}, {.id = ALC285_FIXUP_HP_ENVY_X360, .name = "alc285-hp-envy-x360"}, {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, {.id = ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN, .name = "alc287-yoga9-bass-spk-pin"}, From cc3e3d3a9d09456cf21694b7ea8b9d781e85fda3 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Sat, 5 Apr 2025 16:37:05 +0200 Subject: [PATCH 175/559] MIPS: rename rollback_handler with skipover_handler Recently the rollback region has been changed into an idle interrupt region [1]. This patch make the appropriate changes renaming functions and macro, to reflect the change. [1] https://lore.kernel.org/linux-mips/20250403161143.361461-2-marco.crivellari@suse.com/ Signed-off-by: Marco Crivellari Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/idle.h | 2 +- arch/mips/kernel/genex.S | 10 +++++----- arch/mips/kernel/traps.c | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/mips/include/asm/idle.h b/arch/mips/include/asm/idle.h index 2bc3678455ed..c7d75807d13f 100644 --- a/arch/mips/include/asm/idle.h +++ b/arch/mips/include/asm/idle.h @@ -9,7 +9,7 @@ extern void (*cpu_wait)(void); extern asmlinkage void r4k_wait(void); extern void r4k_wait_irqoff(void); -static inline int using_rollback_handler(void) +static inline int using_skipover_handler(void) { return cpu_wait == r4k_wait; } diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 2cf312d9a3b0..08c0a01d9a29 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -116,7 +116,7 @@ LEAF(r4k_wait) * If an interrupt lands here, before going idle on the next * instruction, we must *NOT* go idle since the interrupt could * have set TIF_NEED_RESCHED or caused a timer to need resched. - * Fall through -- see rollback_handler below -- and have the + * Fall through -- see skipover_handler below -- and have the * idle loop take care of things. */ 1: .fill 0 @@ -139,8 +139,8 @@ r4k_wait_exit: END(r4k_wait) .previous - .macro BUILD_ROLLBACK_PROLOGUE handler - FEXPORT(rollback_\handler) + .macro BUILD_SKIPOVER_PROLOGUE handler + FEXPORT(skipover_\handler) .set push .set noat MFC0 k0, CP0_EPC @@ -156,7 +156,7 @@ r4k_wait_exit: .endm .align 5 -BUILD_ROLLBACK_PROLOGUE handle_int +BUILD_SKIPOVER_PROLOGUE handle_int NESTED(handle_int, PT_SIZE, sp) .cfi_signal_frame #ifdef CONFIG_TRACE_IRQFLAGS @@ -276,7 +276,7 @@ NESTED(except_vec_ejtag_debug, 0, sp) * This prototype is copied to ebase + n*IntCtl.VS and patched * to invoke the handler */ -BUILD_ROLLBACK_PROLOGUE except_vec_vi +BUILD_SKIPOVER_PROLOGUE except_vec_vi NESTED(except_vec_vi, 0, sp) SAVE_SOME docfi=1 SAVE_AT docfi=1 diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 39e248d0ed59..8ec1e185b35c 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -77,7 +77,7 @@ #include "access-helper.h" extern void check_wait(void); -extern asmlinkage void rollback_handle_int(void); +extern asmlinkage void skipover_handle_int(void); extern asmlinkage void handle_int(void); extern asmlinkage void handle_adel(void); extern asmlinkage void handle_ades(void); @@ -2066,7 +2066,7 @@ void *set_vi_handler(int n, vi_handler_t addr) { extern const u8 except_vec_vi[]; extern const u8 except_vec_vi_ori[], except_vec_vi_end[]; - extern const u8 rollback_except_vec_vi[]; + extern const u8 skipover_except_vec_vi[]; unsigned long handler; unsigned long old_handler = vi_handlers[n]; int srssets = current_cpu_data.srsets; @@ -2095,7 +2095,7 @@ void *set_vi_handler(int n, vi_handler_t addr) change_c0_srsmap(0xf << n*4, 0 << n*4); } - vec_start = using_rollback_handler() ? rollback_except_vec_vi : + vec_start = using_skipover_handler() ? skipover_except_vec_vi : except_vec_vi; #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN) ori_offset = except_vec_vi_ori - vec_start + 2; @@ -2426,8 +2426,8 @@ void __init trap_init(void) if (board_be_init) board_be_init(); - set_except_vector(EXCCODE_INT, using_rollback_handler() ? - rollback_handle_int : handle_int); + set_except_vector(EXCCODE_INT, using_skipover_handler() ? + skipover_handle_int : handle_int); set_except_vector(EXCCODE_MOD, handle_tlbm); set_except_vector(EXCCODE_TLBL, handle_tlbl); set_except_vector(EXCCODE_TLBS, handle_tlbs); From 7f74c066e5d920b3a2f0f936060984e3b3709250 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 7 Apr 2025 18:32:21 +0200 Subject: [PATCH 176/559] MIPS: CPS: Fix potential NULL pointer dereferences in cps_prepare_cpus() Check the return values of kcalloc() and exit early to avoid potential NULL pointer dereferences. Compile-tested only. Cc: stable@vger.kernel.org Fixes: 75fa6a583882e ("MIPS: CPS: Introduce struct cluster_boot_config") Fixes: 0856c143e1cd3 ("MIPS: CPS: Boot CPUs in secondary clusters") Signed-off-by: Thorsten Blum Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/smp-cps.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index e85bd087467e..cc26d56f3ab6 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -332,6 +332,8 @@ static void __init cps_prepare_cpus(unsigned int max_cpus) mips_cps_cluster_bootcfg = kcalloc(nclusters, sizeof(*mips_cps_cluster_bootcfg), GFP_KERNEL); + if (!mips_cps_cluster_bootcfg) + goto err_out; if (nclusters > 1) mips_cm_update_property(); @@ -348,6 +350,8 @@ static void __init cps_prepare_cpus(unsigned int max_cpus) mips_cps_cluster_bootcfg[cl].core_power = kcalloc(BITS_TO_LONGS(ncores), sizeof(unsigned long), GFP_KERNEL); + if (!mips_cps_cluster_bootcfg[cl].core_power) + goto err_out; /* Allocate VPE boot configuration structs */ for (c = 0; c < ncores; c++) { From 5591ce0069ddda97cdbbea596bed53e698f399c2 Mon Sep 17 00:00:00 2001 From: Wojciech Dubowik Date: Thu, 24 Apr 2025 11:59:14 +0200 Subject: [PATCH 177/559] arm64: dts: imx8mm-verdin: Link reg_usdhc2_vqmmc to usdhc2 Define vqmmc regulator-gpio for usdhc2 with vin-supply coming from LDO5. Without this definition LDO5 will be powered down, disabling SD card after bootup. This has been introduced in commit f5aab0438ef1 ("regulator: pca9450: Fix enable register for LDO5"). Fixes: 6a57f224f734 ("arm64: dts: freescale: add initial support for verdin imx8m mini") Fixes: f5aab0438ef1 ("regulator: pca9450: Fix enable register for LDO5") Tested-by: Manuel Traut Reviewed-by: Philippe Schenker Tested-by: Francesco Dolcini Reviewed-by: Francesco Dolcini Cc: stable@vger.kernel.org Signed-off-by: Wojciech Dubowik Signed-off-by: Shawn Guo --- .../boot/dts/freescale/imx8mm-verdin.dtsi | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi index 7251ad3a0017..b46566f3ce20 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi @@ -144,6 +144,19 @@ startup-delay-us = <20000>; }; + reg_usdhc2_vqmmc: regulator-usdhc2-vqmmc { + compatible = "regulator-gpio"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usdhc2_vsel>; + gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>; + regulator-max-microvolt = <3300000>; + regulator-min-microvolt = <1800000>; + states = <1800000 0x1>, + <3300000 0x0>; + regulator-name = "PMIC_USDHC_VSELECT"; + vin-supply = <®_nvcc_sd>; + }; + reserved-memory { #address-cells = <2>; #size-cells = <2>; @@ -269,7 +282,7 @@ "SODIMM_19", "", "", - "", + "PMIC_USDHC_VSELECT", "", "", "", @@ -785,6 +798,7 @@ pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_cd>; pinctrl-3 = <&pinctrl_usdhc2_sleep>, <&pinctrl_usdhc2_cd_sleep>; vmmc-supply = <®_usdhc2_vmmc>; + vqmmc-supply = <®_usdhc2_vqmmc>; }; &wdog1 { @@ -1206,13 +1220,17 @@ ; /* SODIMM 76 */ }; + pinctrl_usdhc2_vsel: usdhc2vselgrp { + fsl,pins = + ; /* PMIC_USDHC_VSELECT */ + }; + /* * Note: Due to ERR050080 we use discrete external on-module resistors pulling-up to the * on-module +V3.3_1.8_SD (LDO5) rail and explicitly disable the internal pull-ups here. */ pinctrl_usdhc2: usdhc2grp { fsl,pins = - , , /* SODIMM 78 */ , /* SODIMM 74 */ , /* SODIMM 80 */ @@ -1223,7 +1241,6 @@ pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { fsl,pins = - , , , , @@ -1234,7 +1251,6 @@ pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { fsl,pins = - , , , , @@ -1246,7 +1262,6 @@ /* Avoid backfeeding with removed card power */ pinctrl_usdhc2_sleep: usdhc2slpgrp { fsl,pins = - , , , , From 1149719442d28c96dc63cad432b5a6db7c300e1a Mon Sep 17 00:00:00 2001 From: Joachim Priesner Date: Mon, 28 Apr 2025 07:36:06 +0200 Subject: [PATCH 178/559] ALSA: usb-audio: Add second USB ID for Jabra Evolve 65 headset There seem to be multiple USB device IDs used for these; the one I have reports as 0b0e:030c when powered on. (When powered off, it reports as 0b0e:0311.) Signed-off-by: Joachim Priesner Cc: Link: https://patch.msgid.link/20250428053606.9237-1-joachim.priesner@web.de Signed-off-by: Takashi Iwai --- sound/usb/format.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/format.c b/sound/usb/format.c index 9d32b21a5fbb..0ba4641a0eb1 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -260,7 +260,8 @@ static int parse_audio_format_rates_v1(struct snd_usb_audio *chip, struct audiof } /* Jabra Evolve 65 headset */ - if (chip->usb_id == USB_ID(0x0b0e, 0x030b)) { + if (chip->usb_id == USB_ID(0x0b0e, 0x030b) || + chip->usb_id == USB_ID(0x0b0e, 0x030c)) { /* only 48kHz for playback while keeping 16kHz for capture */ if (fp->nr_rates != 1) return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000); From 76047483fe94414edf409dc498498abf346e22f1 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 23 Apr 2025 09:54:42 +0530 Subject: [PATCH 179/559] drm/ttm: fix the warning for hit_low and evict_low MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix the below warning messages: ttm/ttm_bo.c:1098: warning: Function parameter or struct member 'hit_low' not described in 'ttm_bo_swapout_walk' ttm/ttm_bo.c:1098: warning: Function parameter or struct member 'evict_low' not described in 'ttm_bo_swapout_walk' Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Signed-off-by: Sunil Khatri Reviewed-by: Maarten Lankhorst Signed-off-by: Christian König Link: https://lore.kernel.org/r/20250423042442.762108-1-sunil.khatri@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 95b86003c50d..5bf3c969907c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1093,7 +1093,8 @@ struct ttm_bo_swapout_walk { struct ttm_lru_walk walk; /** @gfp_flags: The gfp flags to use for ttm_tt_swapout() */ gfp_t gfp_flags; - + /** @hit_low: Whether we should attempt to swap BO's with low watermark threshold */ + /** @evict_low: If we cannot swap a bo when @try_low is false (first pass) */ bool hit_low, evict_low; }; From b71f9804f66c2592d4c3a2397b7374a4039005a5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 18 Apr 2025 22:46:52 -0700 Subject: [PATCH 180/559] timekeeping: Prevent coarse clocks going backwards Lei Chen raised an issue with CLOCK_MONOTONIC_COARSE seeing time inconsistencies. Lei tracked down that this was being caused by the adjustment: tk->tkr_mono.xtime_nsec -= offset; which is made to compensate for the unaccumulated cycles in offset when the multiplicator is adjusted forward, so that the non-_COARSE clockids don't see inconsistencies. However, the _COARSE clockid getter functions use the adjusted xtime_nsec value directly and do not compensate the negative offset via the clocksource delta multiplied with the new multiplicator. In that case the caller can observe time going backwards in consecutive calls. By design, this negative adjustment should be fine, because the logic run from timekeeping_adjust() is done after it accumulated approximately multiplicator * interval_cycles into xtime_nsec. The accumulated value is always larger then the mult_adj * offset value, which is subtracted from xtime_nsec. Both operations are done together under the tk_core.lock, so the net change to xtime_nsec is always always be positive. However, do_adjtimex() calls into timekeeping_advance() as well, to apply the NTP frequency adjustment immediately. In this case, timekeeping_advance() does not return early when the offset is smaller then interval_cycles. In that case there is no time accumulated into xtime_nsec. But the subsequent call into timekeeping_adjust(), which modifies the multiplicator, subtracts from xtime_nsec to correct for the new multiplicator. Here because there was no accumulation, xtime_nsec becomes smaller than before, which opens a window up to the next accumulation, where the _COARSE clockid getters, which don't compensate for the offset, can observe the inconsistency. This has been tried to be fixed by forwarding the timekeeper in the case that adjtimex() adjusts the multiplier, which resets the offset to zero: 757b000f7b93 ("timekeeping: Fix possible inconsistencies in _COARSE clockids") That works correctly, but unfortunately causes a regression on the adjtimex() side. There are two issues: 1) The forwarding of the base time moves the update out of the original period and establishes a new one. 2) The clearing of the accumulated NTP error is changing the behaviour as well. User-space expects that multiplier/frequency updates are in effect, when the syscall returns, so delaying the update to the next tick is not solving the problem either. Commit 757b000f7b93 was reverted so that the established expectations of user space implementations (ntpd, chronyd) are restored, but that obviously brought the inconsistencies back. One of the initial approaches to fix this was to establish a separate storage for the coarse time getter nanoseconds part by calculating it from the offset. That was dropped on the floor because not having yet another state to maintain was simpler. But given the result of the above exercise, this solution turns out to be the right one. Bring it back in a slightly modified form. Thus introduce timekeeper::coarse_nsec and store that nanoseconds part in it, switch the time getter functions and the VDSO update to use that value. coarse_nsec is set on operations which forward or initialize the timekeeper and after time was accumulated during a tick. If there is no accumulation the timestamp is unchanged. This leaves the adjtimex() behaviour unmodified and prevents coarse time from going backwards. [ jstultz: Simplified the coarse_nsec calculation and kept behavior so coarse clockids aren't adjusted on each inter-tick adjtimex call, slightly reworked the comments and commit message ] Fixes: da15cfdae033 ("time: Introduce CLOCK_REALTIME_COARSE") Reported-by: Lei Chen Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/20250419054706.2319105-1-jstultz@google.com Closes: https://lore.kernel.org/lkml/20250310030004.3705801-1-lei.chen@smartx.com/ --- include/linux/timekeeper_internal.h | 8 +++-- kernel/time/timekeeping.c | 50 ++++++++++++++++++++++++----- kernel/time/vsyscall.c | 4 +-- 3 files changed, 49 insertions(+), 13 deletions(-) diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index e39d4d563b19..785048a3b3e6 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -51,7 +51,7 @@ struct tk_read_base { * @offs_real: Offset clock monotonic -> clock realtime * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai - * @tai_offset: The current UTC to TAI offset in seconds + * @coarse_nsec: The nanoseconds part for coarse time getters * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @clock_was_set_seq: The sequence number of clock was set events @@ -76,6 +76,7 @@ struct tk_read_base { * ntp shifted nano seconds. * @ntp_err_mult: Multiplication factor for scaled math conversion * @skip_second_overflow: Flag used to avoid updating NTP twice with same second + * @tai_offset: The current UTC to TAI offset in seconds * * Note: For timespec(64) based interfaces wall_to_monotonic is what * we need to add to xtime (or xtime corrected for sub jiffy times) @@ -100,7 +101,7 @@ struct tk_read_base { * which results in the following cacheline layout: * * 0: seqcount, tkr_mono - * 1: xtime_sec ... tai_offset + * 1: xtime_sec ... coarse_nsec * 2: tkr_raw, raw_sec * 3,4: Internal variables * @@ -121,7 +122,7 @@ struct timekeeper { ktime_t offs_real; ktime_t offs_boot; ktime_t offs_tai; - s32 tai_offset; + u32 coarse_nsec; /* Cacheline 2: */ struct tk_read_base tkr_raw; @@ -144,6 +145,7 @@ struct timekeeper { u32 ntp_error_shift; u32 ntp_err_mult; u32 skip_second_overflow; + s32 tai_offset; }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 1e67d076f195..a009c91f7b05 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -164,10 +164,34 @@ static inline struct timespec64 tk_xtime(const struct timekeeper *tk) return ts; } +static inline struct timespec64 tk_xtime_coarse(const struct timekeeper *tk) +{ + struct timespec64 ts; + + ts.tv_sec = tk->xtime_sec; + ts.tv_nsec = tk->coarse_nsec; + return ts; +} + +/* + * Update the nanoseconds part for the coarse time keepers. They can't rely + * on xtime_nsec because xtime_nsec could be adjusted by a small negative + * amount when the multiplication factor of the clock is adjusted, which + * could cause the coarse clocks to go slightly backwards. See + * timekeeping_apply_adjustment(). Thus we keep a separate copy for the coarse + * clockids which only is updated when the clock has been set or we have + * accumulated time. + */ +static inline void tk_update_coarse_nsecs(struct timekeeper *tk) +{ + tk->coarse_nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; +} + static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec = ts->tv_sec; tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift; + tk_update_coarse_nsecs(tk); } static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) @@ -175,6 +199,7 @@ static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) tk->xtime_sec += ts->tv_sec; tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift; tk_normalize_xtime(tk); + tk_update_coarse_nsecs(tk); } static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) @@ -708,6 +733,7 @@ static void timekeeping_forward_now(struct timekeeper *tk) tk_normalize_xtime(tk); delta -= incr; } + tk_update_coarse_nsecs(tk); } /** @@ -804,8 +830,8 @@ EXPORT_SYMBOL_GPL(ktime_get_with_offset); ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs) { struct timekeeper *tk = &tk_core.timekeeper; - unsigned int seq; ktime_t base, *offset = offsets[offs]; + unsigned int seq; u64 nsecs; WARN_ON(timekeeping_suspended); @@ -813,7 +839,7 @@ ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs) do { seq = read_seqcount_begin(&tk_core.seq); base = ktime_add(tk->tkr_mono.base, *offset); - nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + nsecs = tk->coarse_nsec; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -2161,7 +2187,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) struct timekeeper *real_tk = &tk_core.timekeeper; unsigned int clock_set = 0; int shift = 0, maxshift; - u64 offset; + u64 offset, orig_offset; guard(raw_spinlock_irqsave)(&tk_core.lock); @@ -2172,7 +2198,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), tk->tkr_mono.cycle_last, tk->tkr_mono.mask, tk->tkr_mono.clock->max_raw_delta); - + orig_offset = offset; /* Check if there's really nothing to do */ if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) return false; @@ -2205,6 +2231,14 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) */ clock_set |= accumulate_nsecs_to_secs(tk); + /* + * To avoid inconsistencies caused adjtimex TK_ADV_FREQ calls + * making small negative adjustments to the base xtime_nsec + * value, only update the coarse clocks if we accumulated time + */ + if (orig_offset != offset) + tk_update_coarse_nsecs(tk); + timekeeping_update_from_shadow(&tk_core, clock_set); return !!clock_set; @@ -2248,7 +2282,7 @@ void ktime_get_coarse_real_ts64(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); - *ts = tk_xtime(tk); + *ts = tk_xtime_coarse(tk); } while (read_seqcount_retry(&tk_core.seq, seq)); } EXPORT_SYMBOL(ktime_get_coarse_real_ts64); @@ -2271,7 +2305,7 @@ void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); - *ts = tk_xtime(tk); + *ts = tk_xtime_coarse(tk); offset = tk_core.timekeeper.offs_real; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -2350,12 +2384,12 @@ void ktime_get_coarse_ts64(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); - now = tk_xtime(tk); + now = tk_xtime_coarse(tk); mono = tk->wall_to_monotonic; } while (read_seqcount_retry(&tk_core.seq, seq)); set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec, - now.tv_nsec + mono.tv_nsec); + now.tv_nsec + mono.tv_nsec); } EXPORT_SYMBOL(ktime_get_coarse_ts64); diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 01c2ab1e8971..32ef27c71b57 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -98,12 +98,12 @@ void update_vsyscall(struct timekeeper *tk) /* CLOCK_REALTIME_COARSE */ vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE]; vdso_ts->sec = tk->xtime_sec; - vdso_ts->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + vdso_ts->nsec = tk->coarse_nsec; /* CLOCK_MONOTONIC_COARSE */ vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE]; vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + nsec = tk->coarse_nsec; nsec = nsec + tk->wall_to_monotonic.tv_nsec; vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec); From 8d16dd7b651b75ce7c79da3539553a25e60668f5 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 24 Apr 2025 11:06:53 +0800 Subject: [PATCH 181/559] MAINTAINERS: erofs: add myself as reviewer I have a solid background in file systems and since much of my recent work has focused on EROFS, I am familiar with it. Now I have the time and am willing to help review EROFS patches. I hope my participation can be helpful to the EROFS patch review process. Signed-off-by: Hongbo Li Acked-by: Chao Yu Acked-by: Gao Xiang Link: https://lore.kernel.org/r/20250424030653.3308358-1-lihongbo22@huawei.com Signed-off-by: Gao Xiang --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3cbf9ac0d83f..b53a268e4a92 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8726,6 +8726,7 @@ M: Chao Yu R: Yue Hu R: Jeffle Xu R: Sandeep Dhavale +R: Hongbo Li L: linux-erofs@lists.ozlabs.org S: Maintained W: https://erofs.docs.kernel.org From 4fb7b8fceb0beebbe00712c3daf49ade0386076a Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Fri, 25 Apr 2025 07:26:39 -0700 Subject: [PATCH 182/559] EDAC/altera: Test the correct error reg offset Test correct structure member, ecc_cecnt_offset, before using it. [ bp: Massage commit message. ] Fixes: 73bcc942f427 ("EDAC, altera: Add Arria10 EDAC support") Signed-off-by: Niravkumar L Rabara Signed-off-by: Matthew Gerlach Signed-off-by: Borislav Petkov (AMD) Acked-by: Dinh Nguyen Cc: stable@kernel.org Link: https://lore.kernel.org/20250425142640.33125-2-matthew.gerlach@altera.com --- drivers/edac/altera_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 3e971f902363..88d9d2f458ee 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -99,7 +99,7 @@ static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id) if (status & priv->ecc_stat_ce_mask) { regmap_read(drvdata->mc_vbase, priv->ecc_saddr_offset, &err_addr); - if (priv->ecc_uecnt_offset) + if (priv->ecc_cecnt_offset) regmap_read(drvdata->mc_vbase, priv->ecc_cecnt_offset, &err_count); edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count, From 6dbe3c5418c4368e824bff6ae4889257dd544892 Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Fri, 25 Apr 2025 07:26:40 -0700 Subject: [PATCH 183/559] EDAC/altera: Set DDR and SDMMC interrupt mask before registration Mask DDR and SDMMC in probe function to avoid spurious interrupts before registration. Removed invalid register write to system manager. Fixes: 1166fde93d5b ("EDAC, altera: Add Arria10 ECC memory init functions") Signed-off-by: Niravkumar L Rabara Signed-off-by: Matthew Gerlach Signed-off-by: Borislav Petkov (AMD) Acked-by: Dinh Nguyen Cc: stable@kernel.org Link: https://lore.kernel.org/20250425142640.33125-3-matthew.gerlach@altera.com --- drivers/edac/altera_edac.c | 7 ++++--- drivers/edac/altera_edac.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 88d9d2f458ee..dcd7008fe06b 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1005,9 +1005,6 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, } } - /* Interrupt mode set to every SBERR */ - regmap_write(ecc_mgr_map, ALTR_A10_ECC_INTMODE_OFST, - ALTR_A10_ECC_INTMODE); /* Enable ECC */ ecc_set_bits(ecc_ctrl_en_mask, (ecc_block_base + ALTR_A10_ECC_CTRL_OFST)); @@ -2127,6 +2124,10 @@ static int altr_edac_a10_probe(struct platform_device *pdev) return PTR_ERR(edac->ecc_mgr_map); } + /* Set irq mask for DDR SBE to avoid any pending irq before registration */ + regmap_write(edac->ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_SET_OFST, + (A10_SYSMGR_ECC_INTMASK_SDMMCB | A10_SYSMGR_ECC_INTMASK_DDR0)); + edac->irq_chip.name = pdev->dev.of_node->name; edac->irq_chip.irq_mask = a10_eccmgr_irq_mask; edac->irq_chip.irq_unmask = a10_eccmgr_irq_unmask; diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 3727e72c8c2e..7248d24c4908 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -249,6 +249,8 @@ struct altr_sdram_mc_data { #define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94 #define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 #define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1) +#define A10_SYSMGR_ECC_INTMASK_SDMMCB BIT(16) +#define A10_SYSMGR_ECC_INTMASK_DDR0 BIT(17) #define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C #define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 From 2c8a7c66c90832432496616a9a3c07293f1364f3 Mon Sep 17 00:00:00 2001 From: Mingcong Bai Date: Fri, 18 Apr 2025 11:16:42 +0800 Subject: [PATCH 184/559] iommu/vt-d: Apply quirk_iommu_igfx for 8086:0044 (QM57/QS57) On the Lenovo ThinkPad X201, when Intel VT-d is enabled in the BIOS, the kernel boots with errors related to DMAR, the graphical interface appeared quite choppy, and the system resets erratically within a minute after it booted: DMAR: DRHD: handling fault status reg 3 DMAR: [DMA Write NO_PASID] Request device [00:02.0] fault addr 0xb97ff000 [fault reason 0x05] PTE Write access is not set Upon comparing boot logs with VT-d on/off, I found that the Intel Calpella quirk (`quirk_calpella_no_shadow_gtt()') correctly applied the igfx IOMMU disable/quirk correctly: pci 0000:00:00.0: DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics Whereas with VT-d on, it went into the "else" branch, which then triggered the DMAR handling fault above: ... else if (!disable_igfx_iommu) { /* we have to ensure the gfx device is idle before we flush */ pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); iommu_set_dma_strict(); } Now, this is not exactly scientific, but moving 0x0044 to quirk_iommu_igfx seems to have fixed the aforementioned issue. Running a few `git blame' runs on the function, I have found that the quirk was originally introduced as a fix specific to ThinkPad X201: commit 9eecabcb9a92 ("intel-iommu: Abort IOMMU setup for igfx if BIOS gave no shadow GTT space") Which was later revised twice to the "else" branch we saw above: - 2011: commit 6fbcfb3e467a ("intel-iommu: Workaround IOTLB hang on Ironlake GPU") - 2024: commit ba00196ca41c ("iommu/vt-d: Decouple igfx_off from graphic identity mapping") I'm uncertain whether further testings on this particular laptops were done in 2011 and (honestly I'm not sure) 2024, but I would be happy to do some distro-specific testing if that's what would be required to verify this patch. P.S., I also see IDs 0x0040, 0x0062, and 0x006a listed under the same `quirk_calpella_no_shadow_gtt()' quirk, but I'm not sure how similar these chipsets are (if they share the same issue with VT-d or even, indeed, if this issue is specific to a bug in the Lenovo BIOS). With regards to 0x0062, it seems to be a Centrino wireless card, but not a chipset? I have also listed a couple (distro and kernel) bug reports below as references (some of them are from 7-8 years ago!), as they seem to be similar issue found on different Westmere/Ironlake, Haswell, and Broadwell hardware setups. Cc: stable@vger.kernel.org Fixes: 6fbcfb3e467a ("intel-iommu: Workaround IOTLB hang on Ironlake GPU") Fixes: ba00196ca41c ("iommu/vt-d: Decouple igfx_off from graphic identity mapping") Link: https://groups.google.com/g/qubes-users/c/4NP4goUds2c?pli=1 Link: https://bugs.archlinux.org/task/65362 Link: https://bbs.archlinux.org/viewtopic.php?id=230323 Reported-by: Wenhao Sun Closes: https://bugzilla.kernel.org/show_bug.cgi?id=197029 Signed-off-by: Mingcong Bai Link: https://lore.kernel.org/r/20250415133330.12528-1-jeffbai@aosc.io Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e60b699e7b8c..cb0b993bebb4 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4439,6 +4439,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); +/* QM57/QS57 integrated gfx malfunctions with dmar */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx); + /* Broadwell igfx malfunctions with dmar */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); @@ -4516,7 +4519,6 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); From 5a2a6c428190f945c5cbf5791f72dbea83e97f66 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 15 Apr 2025 00:17:16 -0400 Subject: [PATCH 185/559] dm: always update the array size in realloc_argv on success realloc_argv() was only updating the array size if it was called with old_argv already allocated. The first time it was called to create an argv array, it would allocate the array but return the array size as zero. dm_split_args() would think that it couldn't store any arguments in the array and would call realloc_argv() again, causing it to reallocate the initial slots (this time using GPF_KERNEL) and finally return a size. Aside from being wasteful, this could cause deadlocks on targets that need to process messages without starting new IO. Instead, realloc_argv should always update the allocated array size on success. Fixes: a0651926553c ("dm table: don't copy from a NULL pointer in realloc_argv()") Cc: stable@vger.kernel.org Signed-off-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka --- drivers/md/dm-table.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 53759dbbe9d6..9e175c5e0634 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -523,9 +523,10 @@ static char **realloc_argv(unsigned int *size, char **old_argv) gfp = GFP_NOIO; } argv = kmalloc_array(new_size, sizeof(*argv), gfp); - if (argv && old_argv) { - memcpy(argv, old_argv, *size * sizeof(*argv)); + if (argv) { *size = new_size; + if (old_argv) + memcpy(argv, old_argv, *size * sizeof(*argv)); } kfree(old_argv); From b79028039f440e7d2c4df6ab243060c4e3803e84 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 25 Apr 2025 13:36:21 +0200 Subject: [PATCH 186/559] cpufreq: Fix setting policy limits when frequency tables are used Commit 7491cdf46b5c ("cpufreq: Avoid using inconsistent policy->min and policy->max") overlooked the fact that policy->min and policy->max were accessed directly in cpufreq_frequency_table_target() and in the functions called by it. Consequently, the changes made by that commit led to problems with setting policy limits. Address this by passing the target frequency limits to __resolve_freq() and cpufreq_frequency_table_target() and propagating them to the functions called by the latter. Fixes: 7491cdf46b5c ("cpufreq: Avoid using inconsistent policy->min and policy->max") Cc: 5.16+ # 5.16+ Closes: https://lore.kernel.org/linux-pm/aAplED3IA_J0eZN0@linaro.org/ Reported-by: Stephan Gerhold Signed-off-by: Rafael J. Wysocki Tested-by: Stephan Gerhold Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/5896780.DvuYhMxLoT@rjwysocki.net --- drivers/cpufreq/cpufreq.c | 22 +++++--- drivers/cpufreq/cpufreq_ondemand.c | 3 +- drivers/cpufreq/freq_table.c | 6 +-- include/linux/cpufreq.h | 83 +++++++++++++++++++----------- 4 files changed, 73 insertions(+), 41 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index acf19b0042bb..f45ded62b0e0 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -536,14 +536,18 @@ void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); static unsigned int __resolve_freq(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) + unsigned int target_freq, + unsigned int min, unsigned int max, + unsigned int relation) { unsigned int idx; + target_freq = clamp_val(target_freq, min, max); + if (!policy->freq_table) return target_freq; - idx = cpufreq_frequency_table_target(policy, target_freq, relation); + idx = cpufreq_frequency_table_target(policy, target_freq, min, max, relation); policy->cached_resolved_idx = idx; policy->cached_target_freq = target_freq; return policy->freq_table[idx].frequency; @@ -577,8 +581,7 @@ unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, if (unlikely(min > max)) min = max; - return __resolve_freq(policy, clamp_val(target_freq, min, max), - CPUFREQ_RELATION_LE); + return __resolve_freq(policy, target_freq, min, max, CPUFREQ_RELATION_LE); } EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); @@ -2397,8 +2400,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (cpufreq_disabled()) return -ENODEV; - target_freq = clamp_val(target_freq, policy->min, policy->max); - target_freq = __resolve_freq(policy, target_freq, relation); + target_freq = __resolve_freq(policy, target_freq, policy->min, + policy->max, relation); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", policy->cpu, target_freq, relation, old_target_freq); @@ -2727,8 +2730,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, * compiler optimizations around them because they may be accessed * concurrently by cpufreq_driver_resolve_freq() during the update. */ - WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H)); - new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L); + WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, + new_data.min, new_data.max, + CPUFREQ_RELATION_H)); + new_data.min = __resolve_freq(policy, new_data.min, new_data.min, + new_data.max, CPUFREQ_RELATION_L); WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min); trace_cpu_frequency_limits(policy); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index a7c38b8b3e78..0e65d37c9231 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -76,7 +76,8 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, return freq_next; } - index = cpufreq_frequency_table_target(policy, freq_next, relation); + index = cpufreq_frequency_table_target(policy, freq_next, policy->min, + policy->max, relation); freq_req = freq_table[index].frequency; freq_reduc = freq_req * od_tuners->powersave_bias / 1000; freq_avg = freq_req - freq_reduc; diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index c03a91502f84..35de513af6c9 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -115,8 +115,8 @@ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy) EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation) { struct cpufreq_frequency_table optimal = { .driver_data = ~0, @@ -147,7 +147,7 @@ int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, cpufreq_for_each_valid_entry_idx(pos, table, i) { freq = pos->frequency; - if ((freq < policy->min) || (freq > policy->max)) + if (freq < min || freq > max) continue; if (freq == target_freq) { optimal.driver_data = i; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 400fee6427a5..7a5b391dcc01 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -776,8 +776,8 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation); + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq); @@ -840,12 +840,12 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_al(policy, target_freq, @@ -855,6 +855,14 @@ static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_l(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find highest freq at or below target in a table in ascending order */ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, unsigned int target_freq, @@ -908,12 +916,12 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ah(policy, target_freq, @@ -923,6 +931,14 @@ static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_h(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find closest freq to target in a table in ascending order */ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, unsigned int target_freq, @@ -993,12 +1009,12 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ac(policy, target_freq, @@ -1008,7 +1024,17 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } -static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_c(policy, target_freq, policy->min, policy->max, efficiencies); +} + +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, + unsigned int min, unsigned int max, + int idx) { unsigned int freq; @@ -1017,11 +1043,13 @@ static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) freq = policy->freq_table[idx].frequency; - return freq == clamp_val(freq, policy->min, policy->max); + return freq == clamp_val(freq, min, max); } static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, + unsigned int min, + unsigned int max, unsigned int relation) { bool efficiencies = policy->efficiencies_available && @@ -1032,29 +1060,26 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, relation &= ~CPUFREQ_RELATION_E; if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)) - return cpufreq_table_index_unsorted(policy, target_freq, - relation); + return cpufreq_table_index_unsorted(policy, target_freq, min, + max, relation); retry: switch (relation) { case CPUFREQ_RELATION_L: - idx = cpufreq_table_find_index_l(policy, target_freq, - efficiencies); + idx = find_index_l(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_H: - idx = cpufreq_table_find_index_h(policy, target_freq, - efficiencies); + idx = find_index_h(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_C: - idx = cpufreq_table_find_index_c(policy, target_freq, - efficiencies); + idx = find_index_c(policy, target_freq, min, max, efficiencies); break; default: WARN_ON_ONCE(1); return 0; } - /* Limit frequency index to honor policy->min/max */ - if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { + /* Limit frequency index to honor min and max */ + if (!cpufreq_is_in_limits(policy, min, max, idx) && efficiencies) { efficiencies = false; goto retry; } From 5b1834d6202f86180e451ad1a2a8a193a1da18fc Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 18 Apr 2025 17:25:12 +0100 Subject: [PATCH 187/559] drm/fdinfo: Protect against driver unbind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we unbind a driver from the PCI device with an active DRM client, subsequent read of the fdinfo data associated with the file descriptor in question will not end well. Protect the path with a drm_dev_enter/exit() pair. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Lucas De Marchi Cc: Rodrigo Vivi Cc: Umesh Nerlige Ramappa Reviewed-by: Christian König Fixes: 3f09a0cd4ea3 ("drm: Add common fdinfo helper") Cc: # v6.5+ Signed-off-by: Christian König Link: https://lore.kernel.org/r/20250418162512.72324-1-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/drm_file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index c299cd94d3f7..cf2463090d3a 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -964,6 +964,10 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) struct drm_file *file = f->private_data; struct drm_device *dev = file->minor->dev; struct drm_printer p = drm_seq_file_printer(m); + int idx; + + if (!drm_dev_enter(dev, &idx)) + return; drm_printf(&p, "drm-driver:\t%s\n", dev->driver->name); drm_printf(&p, "drm-client-id:\t%llu\n", file->client_id); @@ -983,6 +987,8 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) if (dev->driver->show_fdinfo) dev->driver->show_fdinfo(&p, file); + + drm_dev_exit(idx); } EXPORT_SYMBOL(drm_show_fdinfo); From 20a6cff3b283f0601048ace87ad1bc89627e36f2 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Tue, 18 Mar 2025 09:33:33 +0800 Subject: [PATCH 188/559] KVM: x86/mmu: Check and free obsolete roots in kvm_mmu_reload() Check request KVM_REQ_MMU_FREE_OBSOLETE_ROOTS to free obsolete roots in kvm_mmu_reload() to prevent kvm_mmu_reload() from seeing a stale obsolete root. Since kvm_mmu_reload() can be called outside the vcpu_enter_guest() path (e.g., kvm_arch_vcpu_pre_fault_memory()), it may be invoked after a root has been marked obsolete and before vcpu_enter_guest() is invoked to process KVM_REQ_MMU_FREE_OBSOLETE_ROOTS and set root.hpa to invalid. This causes kvm_mmu_reload() to fail to load a new root, which can lead to kvm_arch_vcpu_pre_fault_memory() being stuck in the while loop in kvm_tdp_map_page() since RET_PF_RETRY is always returned due to is_page_fault_stale(). Keep the existing check of KVM_REQ_MMU_FREE_OBSOLETE_ROOTS in vcpu_enter_guest() since the cost of kvm_check_request() is negligible, especially a check that's guarded by kvm_request_pending(). Export symbol of kvm_mmu_free_obsolete_roots() as kvm_mmu_reload() is inline and may be called outside of kvm.ko. Fixes: 6e01b7601dfe ("KVM: x86: Implement kvm_arch_vcpu_pre_fault_memory()") Suggested-by: Sean Christopherson Signed-off-by: Yan Zhao Link: https://lore.kernel.org/r/20250318013333.5817-1-yan.y.zhao@intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu.h | 3 +++ arch/x86/kvm/mmu/mmu.c | 1 + 2 files changed, 4 insertions(+) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 050a0e229a4d..f2b36d32ef40 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -104,6 +104,9 @@ void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) { + if (kvm_check_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu)) + kvm_mmu_free_obsolete_roots(vcpu); + /* * Checking root.hpa is sufficient even when KVM has mirror root. * We can have either: diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 63bb77ee1bb1..387464ebe8e8 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5974,6 +5974,7 @@ void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu) __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.root_mmu); __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu); } +EXPORT_SYMBOL_GPL(kvm_mmu_free_obsolete_roots); static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, int *bytes) From bc43f7114a0e8173968085b21535d57b8030d571 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 28 Apr 2025 13:37:13 +0200 Subject: [PATCH 189/559] drm: adp: Use spin_lock_irqsave for drm device event_lock The lock is used in the interrupt handler so use spin_lock_irqsave to disable interrupts and avoid deadlocks with the irq handler. Fixes: 332122eba628 ("drm: adp: Add Apple Display Pipe driver") Reviewed-by: Alyssa Rosenzweig Signed-off-by: Janne Grunau Link: https://lore.kernel.org/r/20250428-drm_adp_fixes-v2-1-912e081e55d8@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/adp/adp_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index c98c647f981d..157298a8ff42 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -310,6 +310,7 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state) { u32 frame_num = 1; + unsigned long flags; struct adp_drv_private *adp = crtc_to_adp(crtc); struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state, crtc); u64 new_size = ALIGN(new_state->mode.hdisplay * @@ -330,13 +331,13 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, } writel(ADBE_FIFO_SYNC | frame_num, adp->be + ADBE_FIFO); //FIXME: use adbe flush interrupt - spin_lock_irq(&crtc->dev->event_lock); + spin_lock_irqsave(&crtc->dev->event_lock, flags); if (crtc->state->event) { drm_crtc_vblank_get(crtc); adp->event = crtc->state->event; } crtc->state->event = NULL; - spin_unlock_irq(&crtc->dev->event_lock); + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } static const struct drm_crtc_funcs adp_crtc_funcs = { From 7a7d6681d5adde7dc7e648dcc6b9e9be6ca93d5d Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 28 Apr 2025 13:37:14 +0200 Subject: [PATCH 190/559] drm: adp: Handle drm_crtc_vblank_get() errors drm_crtc_vblank_get() may fail when it's called before drm_crtc_vblank_on() on a resetted CRTC. This occurs in drm_crtc_helper_funcs' atomic_flush() calls after drm_atomic_helper_crtc_reset() for example directly after probe. Send the vblank event directly in such cases. Avoids following warning in the subsequent drm_crtc_vblank_put() call from the vblank irq handler as below: adp 228200000.display-pipe: [drm] drm_WARN_ON(atomic_read(&vblank->refcount) == 0) WARNING: CPU: 5 PID: 1206 at drivers/gpu/drm/drm_vblank.c:1247 drm_vblank_put+0x158/0x170 ... Call trace: drm_vblank_put+0x158/0x170 (P) drm_crtc_vblank_put+0x24/0x38 adp_fe_irq+0xd8/0xe8 [adpdrm] __handle_irq_event_percpu+0x94/0x318 handle_irq_event+0x54/0xd0 handle_fasteoi_irq+0xa8/0x240 handle_irq_desc+0x3c/0x68 generic_handle_domain_irq+0x24/0x40 Modifying `crtc->state->event` here is fine as crtc->mutex is locked by the non-async atomic commit. In retrospect this looks so obvious that it doesn't warrant a comment in the file. Signed-off-by: Janne Grunau Reviewed-by: Alyssa Rosenzweig Link: https://lore.kernel.org/r/20250428-drm_adp_fixes-v2-2-912e081e55d8@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/adp/adp_drv.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index 157298a8ff42..bdf27ee742ea 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -331,13 +331,19 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, } writel(ADBE_FIFO_SYNC | frame_num, adp->be + ADBE_FIFO); //FIXME: use adbe flush interrupt - spin_lock_irqsave(&crtc->dev->event_lock, flags); if (crtc->state->event) { - drm_crtc_vblank_get(crtc); - adp->event = crtc->state->event; + struct drm_pending_vblank_event *event = crtc->state->event; + + crtc->state->event = NULL; + spin_lock_irqsave(&crtc->dev->event_lock, flags); + + if (drm_crtc_vblank_get(crtc) != 0) + drm_crtc_send_vblank_event(crtc, event); + else + adp->event = event; + + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } - crtc->state->event = NULL; - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } static const struct drm_crtc_funcs adp_crtc_funcs = { From c082a52125d9007b488d590c412fd126aa78c345 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 28 Apr 2025 13:37:15 +0200 Subject: [PATCH 191/559] drm: adp: Enable vblank interrupts in crtc's .atomic_enable Calling drm_crtc_vblank_on() drm_crtc_helper_funcs' atomic_enable is expected to enable vblank interrupts. It may have been avoided here to due to drm_crtc_vblank_get()'s error behavior after drm_crtc_vblank_reset(). With that fixed in the preceding change the driver can call drm_crtc_vblank_on() from adp_crtc_atomic_enable(). Reviewed-by: Alyssa Rosenzweig Signed-off-by: Janne Grunau Link: https://lore.kernel.org/r/20250428-drm_adp_fixes-v2-3-912e081e55d8@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/adp/adp_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index bdf27ee742ea..50d26c73301c 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -288,6 +288,7 @@ static void adp_crtc_atomic_enable(struct drm_crtc *crtc, writel(BIT(0), adp->be + ADBE_BLEND_EN3); writel(BIT(0), adp->be + ADBE_BLEND_BYPASS); writel(BIT(0), adp->be + ADBE_BLEND_EN4); + drm_crtc_vblank_on(crtc); } static void adp_crtc_atomic_disable(struct drm_crtc *crtc, @@ -519,8 +520,7 @@ static int adp_drm_bind(struct device *dev) struct adp_drv_private *adp = to_adp(drm); int err; - adp_disable_vblank(adp); - writel(ADP_CTRL_FIFO_ON | ADP_CTRL_VBLANK_ON, adp->fe + ADP_CTRL); + writel(ADP_CTRL_FIFO_ON, adp->fe + ADP_CTRL); adp->next_bridge = drmm_of_get_bridge(&adp->drm, dev->of_node, 0, 0); if (IS_ERR(adp->next_bridge)) { From 8f6dfc4d7037e88cc0a4be4f290829946999341f Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 28 Apr 2025 13:37:16 +0200 Subject: [PATCH 192/559] drm: adp: Remove pointless irq_lock spin lock Interrupt handlers run with interrupts disabled so it is not necessary to protect them against reentrancy. Reviewed-by: Alyssa Rosenzweig Signed-off-by: Janne Grunau Link: https://lore.kernel.org/r/20250428-drm_adp_fixes-v2-4-912e081e55d8@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/adp/adp_drv.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index 50d26c73301c..54cde090c3f4 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -121,7 +121,6 @@ struct adp_drv_private { dma_addr_t mask_iova; int be_irq; int fe_irq; - spinlock_t irq_lock; struct drm_pending_vblank_event *event; }; @@ -490,8 +489,6 @@ static irqreturn_t adp_fe_irq(int irq, void *arg) u32 int_status; u32 int_ctl; - spin_lock(&adp->irq_lock); - int_status = readl(adp->fe + ADP_INT_STATUS); if (int_status & ADP_INT_STATUS_VBLANK) { drm_crtc_handle_vblank(&adp->crtc); @@ -509,7 +506,6 @@ static irqreturn_t adp_fe_irq(int irq, void *arg) writel(int_status, adp->fe + ADP_INT_STATUS); - spin_unlock(&adp->irq_lock); return IRQ_HANDLED; } @@ -574,8 +570,6 @@ static int adp_probe(struct platform_device *pdev) if (IS_ERR(adp)) return PTR_ERR(adp); - spin_lock_init(&adp->irq_lock); - dev_set_drvdata(&pdev->dev, &adp->drm); err = adp_parse_of(pdev, adp); From 32dce6b1949a696dc7abddc04de8cbe35c260217 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Tue, 4 Mar 2025 20:12:14 +0100 Subject: [PATCH 193/559] drm: Select DRM_KMS_HELPER from DRM_DEBUG_DP_MST_TOPOLOGY_REFS Using "depends on" and "select" for the same Kconfig symbol is known to cause circular dependencies (cmp. "Kconfig recursive dependency limitations" in Documentation/kbuild/kconfig-language.rst. DRM drivers are selecting drm helpers so do the same for DRM_DEBUG_DP_MST_TOPOLOGY_REFS. Fixes following circular dependency reported on x86 for the downstream Asahi Linux tree: error: recursive dependency detected! symbol DRM_KMS_HELPER is selected by DRM_GEM_SHMEM_HELPER symbol DRM_GEM_SHMEM_HELPER is selected by RUST_DRM_GEM_SHMEM_HELPER symbol RUST_DRM_GEM_SHMEM_HELPER is selected by DRM_ASAHI symbol DRM_ASAHI depends on RUST symbol RUST depends on CALL_PADDING symbol CALL_PADDING depends on OBJTOOL symbol OBJTOOL is selected by STACK_VALIDATION symbol STACK_VALIDATION depends on UNWINDER_FRAME_POINTER symbol UNWINDER_FRAME_POINTER is part of choice block at arch/x86/Kconfig.debug:224 symbol unknown is visible depending on UNWINDER_GUESS symbol UNWINDER_GUESS prompt is visible depending on STACKDEPOT symbol STACKDEPOT is selected by DRM_DEBUG_DP_MST_TOPOLOGY_REFS symbol DRM_DEBUG_DP_MST_TOPOLOGY_REFS depends on DRM_KMS_HELPER Fixes: 12a280c72868 ("drm/dp_mst: Add topology ref history tracking for debugging") Cc: stable@vger.kernel.org Signed-off-by: Janne Grunau Acked-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250304-drm_debug_dp_mst_topo_kconfig-v1-1-e16fd152f258@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 2cba2b6ebe1c..f01925ed8176 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -188,7 +188,7 @@ config DRM_DEBUG_DP_MST_TOPOLOGY_REFS bool "Enable refcount backtrace history in the DP MST helpers" depends on STACKTRACE_SUPPORT select STACKDEPOT - depends on DRM_KMS_HELPER + select DRM_KMS_HELPER depends on DEBUG_KERNEL depends on EXPERT help From 9a046c1d21f0ae14c73b5e106e5a501dd902b6a9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 9 Apr 2025 14:22:55 +0200 Subject: [PATCH 194/559] Input: stmpe-ts - use module alias instead of device table When compile tested with W=1 on x86_64 with driver as built-in: stmpe-ts.c:371:34: error: unused variable 'stmpe_ts_ids' [-Werror,-Wunused-const-variable] Ideally this would be referenced from the platform_driver, but since the compatible string is already matched by the mfd driver for its parent device, that would break probing. In this case, the of_device_id table just serves as a module alias for loading the driver, while the device itself is probed using the platform device name. Remove the table and instead use a module alias that reflects how the driver is actually probed. Link: https://lore.kernel.org/lkml/20240403080702.3509288-8-arnd@kernel.org/ Link: https://lore.kernel.org/lkml/181dbdb8-c050-4966-8cb4-2f39495ff3f9@app.fastmail.com/ Signed-off-by: Arnd Bergmann Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250409122314.2848028-3-arnd@kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/stmpe-ts.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/input/touchscreen/stmpe-ts.c b/drivers/input/touchscreen/stmpe-ts.c index a94a1997f96b..af0fb38bcfdc 100644 --- a/drivers/input/touchscreen/stmpe-ts.c +++ b/drivers/input/touchscreen/stmpe-ts.c @@ -366,12 +366,7 @@ static struct platform_driver stmpe_ts_driver = { }; module_platform_driver(stmpe_ts_driver); -static const struct of_device_id stmpe_ts_ids[] = { - { .compatible = "st,stmpe-ts", }, - { }, -}; -MODULE_DEVICE_TABLE(of, stmpe_ts_ids); - +MODULE_ALIAS("platform:stmpe-ts"); MODULE_AUTHOR("Luotao Fu "); MODULE_DESCRIPTION("STMPEXXX touchscreen driver"); MODULE_LICENSE("GPL"); From b8ac485a179d8819ae291afbf13a2fd89d76d4f3 Mon Sep 17 00:00:00 2001 From: Mattijs Korpershoek Date: Mon, 28 Apr 2025 10:35:13 +0200 Subject: [PATCH 195/559] dt-bindings: mediatek,mt6779-keypad: Update Mattijs' email address Update Mattijs Korpershoek's email address to @kernel.org. Signed-off-by: Mattijs Korpershoek Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250428-keypad-email-v1-1-dde6ac76725b@kernel.org Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/mediatek,mt6779-keypad.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml b/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml index 517a4ac1bea3..e365413732e7 100644 --- a/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml +++ b/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Mediatek's Keypad Controller maintainers: - - Mattijs Korpershoek + - Mattijs Korpershoek allOf: - $ref: /schemas/input/matrix-keymap.yaml# From 6a10a2f1e0502c1f23e3095291c985d9bd8c8488 Mon Sep 17 00:00:00 2001 From: Mattijs Korpershoek Date: Mon, 28 Apr 2025 10:40:17 +0200 Subject: [PATCH 196/559] MAINTAINERS: .mailmap: update Mattijs Korpershoek's email address Update Mattijs Korpershoek's email address to @kernel.org. Signed-off-by: Mattijs Korpershoek Link: https://lore.kernel.org/r/20250428-keypad-maintainers-v1-1-4e9c4afba415@kernel.org Signed-off-by: Dmitry Torokhov --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index a897c16d3bae..86e8c8e55ed9 100644 --- a/.mailmap +++ b/.mailmap @@ -472,6 +472,7 @@ Matthias Fuchs Matthieu Baerts Matthieu CASTET Matti Vaittinen +Mattijs Korpershoek Matt Ranostay Matt Ranostay Matt Ranostay Matthew Ranostay diff --git a/MAINTAINERS b/MAINTAINERS index 82eb3de2d76c..765219b393db 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14744,7 +14744,7 @@ F: Documentation/devicetree/bindings/media/mediatek-jpeg-*.yaml F: drivers/media/platform/mediatek/jpeg/ MEDIATEK KEYPAD DRIVER -M: Mattijs Korpershoek +M: Mattijs Korpershoek S: Supported F: Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml F: drivers/input/keyboard/mt6779-keypad.c From 7675b5efd81fe6d524e29d5a541f43201e98afa8 Mon Sep 17 00:00:00 2001 From: Mikael Gonella-Bolduc Date: Wed, 23 Apr 2025 09:52:43 -0400 Subject: [PATCH 197/559] Input: cyttsp5 - fix power control issue on wakeup The power control function ignores the "on" argument when setting the report ID, and thus is always sending HID_POWER_SLEEP. This causes a problem when trying to wakeup. Fix by sending the state variable, which contains the proper HID_POWER_ON or HID_POWER_SLEEP based on the "on" argument. Fixes: 3c98b8dbdced ("Input: cyttsp5 - implement proper sleep and wakeup procedures") Cc: stable@vger.kernel.org Signed-off-by: Mikael Gonella-Bolduc Signed-off-by: Hugo Villeneuve Reviewed-by: Alistair Francis Link: https://lore.kernel.org/r/20250423135243.1261460-1-hugo@hugovil.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/cyttsp5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/cyttsp5.c b/drivers/input/touchscreen/cyttsp5.c index 14c43f0a6c21..071b7c9bf566 100644 --- a/drivers/input/touchscreen/cyttsp5.c +++ b/drivers/input/touchscreen/cyttsp5.c @@ -580,7 +580,7 @@ static int cyttsp5_power_control(struct cyttsp5 *ts, bool on) int rc; SET_CMD_REPORT_TYPE(cmd[0], 0); - SET_CMD_REPORT_ID(cmd[0], HID_POWER_SLEEP); + SET_CMD_REPORT_ID(cmd[0], state); SET_CMD_OPCODE(cmd[1], HID_CMD_SET_POWER); rc = cyttsp5_write(ts, HID_COMMAND_REG, cmd, sizeof(cmd)); From 22cd66a5db56a07d9e621367cb4d16ff0f6baf56 Mon Sep 17 00:00:00 2001 From: Lode Willems Date: Tue, 22 Apr 2025 13:24:27 +0200 Subject: [PATCH 198/559] Input: xpad - add support for 8BitDo Ultimate 2 Wireless Controller This patch adds support for the 8BitDo Ultimate 2 Wireless Controller. Tested using the wireless dongle and plugged in. Signed-off-by: Lode Willems Link: https://lore.kernel.org/r/20250422112457.6728-1-me@lodewillems.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index c33e6f33265b..c933e47173bd 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -387,6 +387,7 @@ static const struct xpad_device { { 0x2dc8, 0x3106, "8BitDo Ultimate Wireless / Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x3109, "8BitDo Ultimate Wireless Bluetooth", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310a, "8BitDo Ultimate 2C Wireless Controller", 0, XTYPE_XBOX360 }, + { 0x2dc8, 0x310b, "8BitDo Ultimate 2 Wireless Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x2e24, 0x1688, "Hyperkin X91 X-Box One pad", 0, XTYPE_XBOXONE }, From a2f546330ef9f3471ab9dd5f59e9685733b6c0dc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 20:07:24 -0400 Subject: [PATCH 199/559] bcachefs: Fix losing return code in next_fiemap_extent() Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 0f1d61aab90b..72b722d80813 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1464,8 +1464,8 @@ static int bch2_next_fiemap_extent(struct btree_trans *trans, unsigned sectors = cur->kbuf.k->k.size; s64 offset_into_extent = 0; enum btree_id data_btree = BTREE_ID_extents; - int ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, - &cur->kbuf); + ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, + &cur->kbuf); if (ret) goto err; From c83311c5b90d12ea22e847fd9390e2fdb6a34f68 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 11:38:58 -0400 Subject: [PATCH 200/559] bcachefs: Use generic_set_sb_d_ops for standard casefolding d_ops Suggested-by: Al Viro Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 11 ++++++++--- fs/bcachefs/namei.c | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 72b722d80813..113db85b6ef9 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -66,6 +66,8 @@ static inline void bch2_inode_flags_to_vfs(struct bch_fs *c, struct bch_inode_in if (bch2_inode_casefold(c, &inode->ei_inode)) inode->v.i_flags |= S_CASEFOLD; + else + inode->v.i_flags &= ~S_CASEFOLD; } void bch2_inode_update_after_write(struct btree_trans *trans, @@ -848,10 +850,8 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, set_nlink(&inode->v, 0); } - if (IS_CASEFOLDED(vdir)) { + if (IS_CASEFOLDED(vdir)) d_invalidate(dentry); - d_prune_aliases(&inode->v); - } err: bch2_trans_put(trans); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); @@ -2571,6 +2571,11 @@ got_sb: if (ret) goto err_put_super; +#ifdef CONFIG_UNICODE + sb->s_encoding = c->cf_encoding; +#endif + generic_set_sb_d_ops(sb); + vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); ret = PTR_ERR_OR_ZERO(vinode); bch_err_msg(c, ret, "mounting: error getting root inode"); diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 46f3c8b100a9..52c58c6d53d2 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -343,6 +343,9 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u, bool ret = false; for (id = 0; id < Inode_opt_nr; id++) { + if (!S_ISDIR(dst_u->bi_mode) && id == Inode_opt_casefold) + continue; + /* Skip attributes that were explicitly set on this inode */ if (dst_u->bi_fields_set & (1 << id)) continue; From 70c3d89f49523933365d91010f88206855bc1990 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 12:09:33 -0400 Subject: [PATCH 201/559] bcachefs: Emit unicode version message on startup fstests expects this Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index e4ab0595c0ae..32fccca350ed 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -823,25 +823,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; -#ifdef CONFIG_UNICODE - /* Default encoding until we can potentially have more as an option. */ - c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); - if (IS_ERR(c->cf_encoding)) { - printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", - unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); - ret = -EINVAL; - goto err; - } -#else - if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { - printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); - ret = -EINVAL; - goto err; - } -#endif - pr_uuid(&name, c->sb.user_uuid.b); ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0; if (ret) @@ -941,6 +922,29 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; +#ifdef CONFIG_UNICODE + /* Default encoding until we can potentially have more as an option. */ + c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); + if (IS_ERR(c->cf_encoding)) { + printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); + ret = -EINVAL; + goto err; + } + bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); +#else + if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { + printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); + ret = -EINVAL; + goto err; + } +#endif + for (i = 0; i < c->sb.nr_devices; i++) { if (!bch2_member_exists(c->disk_sb.sb, i)) continue; From bdc32a10a29c3993b3c6c38b21951b66bea525d7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 12:19:47 -0400 Subject: [PATCH 202/559] bcachefs: Add missing utf8_unload() Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 32fccca350ed..27943082c093 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -531,6 +531,10 @@ static void __bch2_fs_free(struct bch_fs *c) for (unsigned i = 0; i < BCH_TIME_STAT_NR; i++) bch2_time_stats_exit(&c->times[i]); +#ifdef CONFIG_UNICODE + utf8_unload(c->cf_encoding); +#endif + bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); bch2_free_fsck_errs(c); From 3c24020119a5f55ec902b5fdc24d8666d76340b3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 11:05:32 -0400 Subject: [PATCH 203/559] bcachefs: Run BCH_RECOVERY_PASS_reconstruct_snapshots on missing subvol -> snapshot Fix this repair path. Signed-off-by: Kent Overstreet --- fs/bcachefs/subvolume.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 5537283d0bea..d0209f7658bb 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -6,6 +6,7 @@ #include "errcode.h" #include "error.h" #include "fs.h" +#include "recovery_passes.h" #include "snapshot.h" #include "subvolume.h" @@ -44,8 +45,8 @@ static int check_subvol(struct btree_trans *trans, ret = bch2_snapshot_lookup(trans, snapid, &snapshot); if (bch2_err_matches(ret, ENOENT)) - bch_err(c, "subvolume %llu points to nonexistent snapshot %u", - k.k->p.offset, snapid); + return bch2_run_explicit_recovery_pass(c, + BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; if (ret) return ret; From 9e9c28acfdc78292100fdd0e46587bf43a174451 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 11:05:32 -0400 Subject: [PATCH 204/559] bcachefs: Add upgrade table entry from 0.14 There are a few errors that needed to be marked as autofix. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-downgrade.c | 4 ++++ fs/bcachefs/sb-errors_format.h | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index acb5d845841e..badd0e17ada5 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -20,6 +20,10 @@ * x(version, recovery_passes, errors...) */ #define UPGRADE_TABLE() \ + x(snapshot_2, \ + RECOVERY_PASS_ALL_FSCK, \ + BCH_FSCK_ERR_subvol_root_wrong_bi_subvol, \ + BCH_FSCK_ERR_subvol_not_master_and_not_snapshot) \ x(backpointers, \ RECOVERY_PASS_ALL_FSCK) \ x(inode_v3, \ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index dc53d25c7cbb..3711aa5009ac 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -205,9 +205,9 @@ enum bch_fsck_flags { x(snapshot_bad_depth, 184, 0) \ x(snapshot_bad_skiplist, 185, 0) \ x(subvol_pos_bad, 186, 0) \ - x(subvol_not_master_and_not_snapshot, 187, 0) \ + x(subvol_not_master_and_not_snapshot, 187, FSCK_AUTOFIX) \ x(subvol_to_missing_root, 188, 0) \ - x(subvol_root_wrong_bi_subvol, 189, 0) \ + x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \ x(bkey_in_missing_snapshot, 190, 0) \ x(inode_pos_inode_nonzero, 191, 0) \ x(inode_pos_blockdev_range, 192, 0) \ From 002466446abae31a15e8b89adb54ee08653eccd1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 12:09:53 -0400 Subject: [PATCH 205/559] bcachefs: fix bch2_dev_buckets_resize() The resize memcpy path was totally busted. Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 4ef261e8db4f..e1efae43982a 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1307,13 +1307,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); if (resize) { - bucket_gens->nbuckets = min(bucket_gens->nbuckets, - old_bucket_gens->nbuckets); - bucket_gens->nbuckets_minus_first = - bucket_gens->nbuckets - bucket_gens->first_bucket; + u64 copy = min(bucket_gens->nbuckets, + old_bucket_gens->nbuckets); memcpy(bucket_gens->b, old_bucket_gens->b, - bucket_gens->nbuckets); + sizeof(bucket_gens->b[0]) * copy); } rcu_assign_pointer(ca->bucket_gens, bucket_gens); From e7f1a52849a01c63c796a9dfe6697d05fff23324 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 12:01:51 -0400 Subject: [PATCH 206/559] bcachefs: Improve bch2_dev_bucket_missing() More useful error message. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.c | 6 ++++-- fs/bcachefs/sb-members.h | 13 ++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 116131f95815..72779912939b 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -15,9 +15,11 @@ void bch2_dev_missing(struct bch_fs *c, unsigned dev) bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); } -void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket) +void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket) { - bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset); + bch2_fs_inconsistent(ca->fs, + "pointer to nonexistent bucket %llu on device %s (valid range %u-%llu)", + bucket, ca->name, ca->mi.first_bucket, ca->mi.nbuckets); } #define x(t, n, ...) [n] = #t, diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 06bb41a3f360..42786657522c 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -249,20 +249,23 @@ static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev) static inline struct bch_dev *bch2_dev_bucket_tryget_noerror(struct bch_fs *c, struct bpos bucket) { struct bch_dev *ca = bch2_dev_tryget_noerror(c, bucket.inode); - if (ca && !bucket_valid(ca, bucket.offset)) { + if (ca && unlikely(!bucket_valid(ca, bucket.offset))) { bch2_dev_put(ca); ca = NULL; } return ca; } -void bch2_dev_bucket_missing(struct bch_fs *, struct bpos); +void bch2_dev_bucket_missing(struct bch_dev *, u64); static inline struct bch_dev *bch2_dev_bucket_tryget(struct bch_fs *c, struct bpos bucket) { - struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, bucket); - if (!ca) - bch2_dev_bucket_missing(c, bucket); + struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode); + if (ca && unlikely(!bucket_valid(ca, bucket.offset))) { + bch2_dev_bucket_missing(ca, bucket.offset); + bch2_dev_put(ca); + ca = NULL; + } return ca; } From eca5b56ccfdf583a8781503646fb39554f8624bd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 12:11:31 -0400 Subject: [PATCH 207/559] bcachefs: Don't generate alloc updates to invalid buckets Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index e1efae43982a..31fbc2716d8b 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -604,6 +604,13 @@ static int bch2_trigger_pointer(struct btree_trans *trans, } struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); + if (!bucket_valid(ca, bucket.offset)) { + if (insert) { + bch2_dev_bucket_missing(ca, bucket.offset); + ret = -BCH_ERR_trigger_pointer; + } + goto err; + } if (flags & BTREE_TRIGGER_transactional) { struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); From c366b1672d74cb008974f6e36e34dc191621f3bb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 09:31:23 -0400 Subject: [PATCH 208/559] bcachefs: btree_node_data_missing is now autofix Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 3711aa5009ac..a4ad5924107b 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -46,7 +46,7 @@ enum bch_fsck_flags { x(btree_node_unsupported_version, 34, 0) \ x(btree_node_bset_older_than_sb_min, 35, 0) \ x(btree_node_bset_newer_than_sb, 36, 0) \ - x(btree_node_data_missing, 37, 0) \ + x(btree_node_data_missing, 37, FSCK_AUTOFIX) \ x(btree_node_bset_after_end, 38, 0) \ x(btree_node_replicas_sectors_written_mismatch, 39, 0) \ x(btree_node_replicas_data_mismatch, 40, 0) \ From f04dd30f1bef1ed2e74a4050af6e5e5e3869bac3 Mon Sep 17 00:00:00 2001 From: Vishal Badole Date: Thu, 24 Apr 2025 18:32:48 +0530 Subject: [PATCH 209/559] amd-xgbe: Fix to ensure dependent features are toggled with RX checksum offload According to the XGMAC specification, enabling features such as Layer 3 and Layer 4 Packet Filtering, Split Header and Virtualized Network support automatically selects the IPC Full Checksum Offload Engine on the receive side. When RX checksum offload is disabled, these dependent features must also be disabled to prevent abnormal behavior caused by mismatched feature dependencies. Ensure that toggling RX checksum offload (disabling or enabling) properly disables or enables all dependent features, maintaining consistent and expected behavior in the network device. Cc: stable@vger.kernel.org Fixes: 1a510ccf5869 ("amd-xgbe: Add support for VXLAN offload capabilities") Signed-off-by: Vishal Badole Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250424130248.428865-1-Vishal.Badole@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-desc.c | 9 +++++++-- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 24 +++++++++++++++++++++-- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 11 +++++++++-- drivers/net/ethernet/amd/xgbe/xgbe.h | 4 ++++ 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index 230726d7b74f..d41b58fad37b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -373,8 +373,13 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata, } /* Set up the header page info */ - xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, - XGBE_SKB_ALLOC_SIZE); + if (pdata->netdev->features & NETIF_F_RXCSUM) { + xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, + XGBE_SKB_ALLOC_SIZE); + } else { + xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, + pdata->rx_buf_size); + } /* Set up the buffer page info */ xgbe_set_buffer_data(&rdata->rx.buf, &ring->rx_buf_pa, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index f393228d41c7..f1b0fb02b3cd 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -320,6 +320,18 @@ static void xgbe_config_sph_mode(struct xgbe_prv_data *pdata) XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE); } +static void xgbe_disable_sph_mode(struct xgbe_prv_data *pdata) +{ + unsigned int i; + + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->rx_ring) + break; + + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 0); + } +} + static int xgbe_write_rss_reg(struct xgbe_prv_data *pdata, unsigned int type, unsigned int index, unsigned int val) { @@ -3545,8 +3557,12 @@ static int xgbe_init(struct xgbe_prv_data *pdata) xgbe_config_tx_coalesce(pdata); xgbe_config_rx_buffer_size(pdata); xgbe_config_tso_mode(pdata); - xgbe_config_sph_mode(pdata); - xgbe_config_rss(pdata); + + if (pdata->netdev->features & NETIF_F_RXCSUM) { + xgbe_config_sph_mode(pdata); + xgbe_config_rss(pdata); + } + desc_if->wrapper_tx_desc_init(pdata); desc_if->wrapper_rx_desc_init(pdata); xgbe_enable_dma_interrupts(pdata); @@ -3702,5 +3718,9 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) hw_if->disable_vxlan = xgbe_disable_vxlan; hw_if->set_vxlan_id = xgbe_set_vxlan_id; + /* For Split Header*/ + hw_if->enable_sph = xgbe_config_sph_mode; + hw_if->disable_sph = xgbe_disable_sph_mode; + DBGPR("<--xgbe_init_function_ptrs\n"); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index d84a310dfcd4..8e09ad8fa022 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2257,10 +2257,17 @@ static int xgbe_set_features(struct net_device *netdev, if (ret) return ret; - if ((features & NETIF_F_RXCSUM) && !rxcsum) + if ((features & NETIF_F_RXCSUM) && !rxcsum) { + hw_if->enable_sph(pdata); + hw_if->enable_vxlan(pdata); hw_if->enable_rx_csum(pdata); - else if (!(features & NETIF_F_RXCSUM) && rxcsum) + schedule_work(&pdata->restart_work); + } else if (!(features & NETIF_F_RXCSUM) && rxcsum) { + hw_if->disable_sph(pdata); + hw_if->disable_vxlan(pdata); hw_if->disable_rx_csum(pdata); + schedule_work(&pdata->restart_work); + } if ((features & NETIF_F_HW_VLAN_CTAG_RX) && !rxvlan) hw_if->enable_rx_vlan_stripping(pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index d85386cac8d1..ed5d43c16d0e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -865,6 +865,10 @@ struct xgbe_hw_if { void (*enable_vxlan)(struct xgbe_prv_data *); void (*disable_vxlan)(struct xgbe_prv_data *); void (*set_vxlan_id)(struct xgbe_prv_data *); + + /* For Split Header */ + void (*enable_sph)(struct xgbe_prv_data *pdata); + void (*disable_sph)(struct xgbe_prv_data *pdata); }; /* This structure represents implementation specific routines for an From 8c47d5753a119f1c986bc3ed92e9178d2624e1e8 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Fri, 25 Apr 2025 05:29:53 +0100 Subject: [PATCH 210/559] net: ethernet: mtk_eth_soc: sync mtk_clks_source_name array When removing the clock bits for clocks which aren't used by the Ethernet driver their names should also have been removed from the mtk_clks_source_name array. Remove them now as enum mtk_clks_map needs to match the mtk_clks_source_name array so the driver can make sure that all required clocks are present and correctly name missing clocks. Fixes: 887b1d1adb2e ("net: ethernet: mtk_eth_soc: drop clocks unused by Ethernet driver") Signed-off-by: Daniel Golle Reviewed-by: Simon Horman Link: https://patch.msgid.link/d075e706ff1cebc07f9ec666736d0b32782fd487.1745555321.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 47807b202310..83068925c589 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -269,12 +269,8 @@ static const char * const mtk_clks_source_name[] = { "ethwarp_wocpu2", "ethwarp_wocpu1", "ethwarp_wocpu0", - "top_usxgmii0_sel", - "top_usxgmii1_sel", "top_sgm0_sel", "top_sgm1_sel", - "top_xfi_phy0_xtal_sel", - "top_xfi_phy1_xtal_sel", "top_eth_gmii_sel", "top_eth_refck_50m_sel", "top_eth_sys_200m_sel", From 10c34b7d71a4ff8c06d926f1846edf8295ed75bf Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Fri, 25 Apr 2025 19:14:18 +0200 Subject: [PATCH 211/559] netlink: specs: ethtool: Remove UAPI duplication of phy-upstream enum The phy-upstream enum is already defined in the ethtool.h UAPI header and used by the ethtool userspace tool. However, the ethtool spec does not reference it, causing YNL to auto-generate a duplicate and redundant enum. Fix this by updating the spec to reference the existing UAPI enum in ethtool.h. Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20250425171419.947352-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 4 +++- include/uapi/linux/ethtool_netlink_generated.h | 5 ----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 655d8d10fe24..c650cd3dcb80 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -89,8 +89,10 @@ definitions: doc: Group of short_detected states - name: phy-upstream-type - enum-name: + enum-name: phy-upstream + header: linux/ethtool.h type: enum + name-prefix: phy-upstream entries: [ mac, phy ] - name: tcp-data-split diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index fe24c3459ac0..30c8dad6214e 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -31,11 +31,6 @@ enum ethtool_header_flags { ETHTOOL_FLAG_STATS = 4, }; -enum { - ETHTOOL_PHY_UPSTREAM_TYPE_MAC, - ETHTOOL_PHY_UPSTREAM_TYPE_PHY, -}; - enum ethtool_tcp_data_split { ETHTOOL_TCP_DATA_SPLIT_UNKNOWN, ETHTOOL_TCP_DATA_SPLIT_DISABLED, From dfd76010f8e821b66116dec3c7d90dd2403d1396 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 25 Apr 2025 13:38:57 -0700 Subject: [PATCH 212/559] pds_core: remove write-after-free of client_id A use-after-free error popped up in stress testing: [Mon Apr 21 21:21:33 2025] BUG: KFENCE: use-after-free write in pdsc_auxbus_dev_del+0xef/0x160 [pds_core] [Mon Apr 21 21:21:33 2025] Use-after-free write at 0x000000007013ecd1 (in kfence-#47): [Mon Apr 21 21:21:33 2025] pdsc_auxbus_dev_del+0xef/0x160 [pds_core] [Mon Apr 21 21:21:33 2025] pdsc_remove+0xc0/0x1b0 [pds_core] [Mon Apr 21 21:21:33 2025] pci_device_remove+0x24/0x70 [Mon Apr 21 21:21:33 2025] device_release_driver_internal+0x11f/0x180 [Mon Apr 21 21:21:33 2025] driver_detach+0x45/0x80 [Mon Apr 21 21:21:33 2025] bus_remove_driver+0x83/0xe0 [Mon Apr 21 21:21:33 2025] pci_unregister_driver+0x1a/0x80 The actual device uninit usually happens on a separate thread scheduled after this code runs, but there is no guarantee of order of thread execution, so this could be a problem. There's no actual need to clear the client_id at this point, so simply remove the offending code. Fixes: 10659034c622 ("pds_core: add the aux client API") Signed-off-by: Shannon Nelson Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250425203857.71547-1-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/auxbus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index c9aac27883a3..92f359f2b449 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -186,7 +186,6 @@ void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf, pds_client_unregister(pf, padev->client_id); auxiliary_device_delete(&padev->aux_dev); auxiliary_device_uninit(&padev->aux_dev); - padev->client_id = 0; *pd_ptr = NULL; mutex_unlock(&pf->config_lock); From f99a3fbf023e20b626be4b0f042463d598050c9a Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:05 -0300 Subject: [PATCH 213/559] net_sched: drr: Fix double list add in class with netem as child qdisc As described in Gerrard's report [1], there are use cases where a netem child qdisc will make the parent qdisc's enqueue callback reentrant. In the case of drr, there won't be a UAF, but the code will add the same classifier to the list twice, which will cause memory corruption. In addition to checking for qlen being zero, this patch checks whether the class was already added to the active_list (cl_is_active) before adding to the list to cover for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-2-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_drr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index e0a81d313aa7..9b6d79bd8737 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -35,6 +35,11 @@ struct drr_sched { struct Qdisc_class_hash clhash; }; +static bool cl_is_active(struct drr_class *cl) +{ + return !list_empty(&cl->alist); +} + static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) { struct drr_sched *q = qdisc_priv(sch); @@ -337,7 +342,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; int err = 0; - bool first; cl = drr_classify(skb, sch, &err); if (cl == NULL) { @@ -347,7 +351,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -357,7 +360,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - if (first) { + if (!cl_is_active(cl)) { list_add_tail(&cl->alist, &q->active); cl->deficit = cl->quantum; } From 141d34391abbb315d68556b7c67ad97885407547 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:06 -0300 Subject: [PATCH 214/559] net_sched: hfsc: Fix a UAF vulnerability in class with netem as child qdisc As described in Gerrard's report [1], we have a UAF case when an hfsc class has a netem child qdisc. The crux of the issue is that hfsc is assuming that checking for cl->qdisc->q.qlen == 0 guarantees that it hasn't inserted the class in the vttree or eltree (which is not true for the netem duplicate case). This patch checks the n_active class variable to make sure that the code won't insert the class in the vttree or eltree twice, catering for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Reported-by: Gerrard Tai Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-3-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_hfsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 6c8ef826cec0..cb8c525ea20e 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1569,7 +1569,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } - if (first) { + if (first && !cl->cl_nactive) { if (cl->cl_flags & HFSC_RSC) init_ed(cl, len); if (cl->cl_flags & HFSC_FSC) From 1a6d0c00fa07972384b0c308c72db091d49988b6 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:07 -0300 Subject: [PATCH 215/559] net_sched: ets: Fix double list add in class with netem as child qdisc As described in Gerrard's report [1], there are use cases where a netem child qdisc will make the parent qdisc's enqueue callback reentrant. In the case of ets, there won't be a UAF, but the code will add the same classifier to the list twice, which will cause memory corruption. In addition to checking for qlen being zero, this patch checks whether the class was already added to the active_list (cl_is_active) before doing the addition to cater for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-4-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_ets.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index c3bdeb14185b..2c069f0181c6 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -74,6 +74,11 @@ static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = { [TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 }, }; +static bool cl_is_active(struct ets_class *cl) +{ + return !list_empty(&cl->alist); +} + static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr, unsigned int *quantum, struct netlink_ext_ack *extack) @@ -416,7 +421,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct ets_sched *q = qdisc_priv(sch); struct ets_class *cl; int err = 0; - bool first; cl = ets_classify(skb, sch, &err); if (!cl) { @@ -426,7 +430,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -436,7 +439,7 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - if (first && !ets_class_is_strict(q, cl)) { + if (!cl_is_active(cl) && !ets_class_is_strict(q, cl)) { list_add_tail(&cl->alist, &q->active); cl->deficit = cl->quantum; } From f139f37dcdf34b67f5bf92bc8e0f7f6b3ac63aa4 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:08 -0300 Subject: [PATCH 216/559] net_sched: qfq: Fix double list add in class with netem as child qdisc As described in Gerrard's report [1], there are use cases where a netem child qdisc will make the parent qdisc's enqueue callback reentrant. In the case of qfq, there won't be a UAF, but the code will add the same classifier to the list twice, which will cause memory corruption. This patch checks whether the class was already added to the agg->active list (cl_is_active) before doing the addition to cater for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-5-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_qfq.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 687a932eb9b2..bf1282cb22eb 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -202,6 +202,11 @@ struct qfq_sched { */ enum update_reason {enqueue, requeue}; +static bool cl_is_active(struct qfq_class *cl) +{ + return !list_empty(&cl->alist); +} + static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) { struct qfq_sched *q = qdisc_priv(sch); @@ -1215,7 +1220,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct qfq_class *cl; struct qfq_aggregate *agg; int err = 0; - bool first; cl = qfq_classify(skb, sch, &err); if (cl == NULL) { @@ -1237,7 +1241,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); @@ -1253,8 +1256,8 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, ++sch->q.qlen; agg = cl->agg; - /* if the queue was not empty, then done here */ - if (!first) { + /* if the class is active, then done here */ + if (cl_is_active(cl)) { if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) && list_first_entry(&agg->active, struct qfq_class, alist) == cl && cl->deficit < len) From a6e1c5aa16dd5d351603c9d3ae259a069eabdcc2 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:09 -0300 Subject: [PATCH 217/559] selftests: tc-testing: Add TDC tests that exercise reentrant enqueue behaviour Add 5 TDC tests that exercise the reentrant enqueue behaviour in drr, ets, qfq, and hfsc: - Test DRR's enqueue reentrant behaviour with netem (which caused a double list add) - Test ETS's enqueue reentrant behaviour with netem (which caused a double list add) - Test QFQ's enqueue reentrant behaviour with netem (which caused a double list add) - Test HFSC's enqueue reentrant behaviour with netem (which caused a UAF) - Test nested DRR's enqueue reentrant behaviour with netem (which caused a double list add) Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-6-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 186 ++++++++++++++++++ 1 file changed, 186 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index e26bbc169783..0843f6d37e9c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -352,5 +352,191 @@ "$TC qdisc del dev $DUMMY handle 1:0 root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "90ec", + "name": "Test DRR's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "drr", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "1f1f", + "name": "Test ETS's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "ets" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root ets bands 2", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 ets quantum 1500", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s class show dev $DUMMY", + "matchJSON": [ + { + "class": "ets", + "handle": "1:1", + "stats": { + "bytes": 196, + "packets": 2 + } + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "5e6d", + "name": "Test QFQ's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root qfq", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 qfq weight 100 maxpkt 1500", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "qfq", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "bf1d", + "name": "Test HFSC's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root hfsc", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc ls m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1", + "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2", + "ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true", + "$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1", + "$TC class del dev $DUMMY classid 1:1" + ], + "cmdUnderTest": "ping -c 1 10.10.10.2 -I$DUMMY > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "hfsc", + "handle": "1:", + "bytes": 392, + "packets": 4 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "7c3b", + "name": "Test nested DRR's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1", + "$TC qdisc add dev $DUMMY handle 2:0 parent 1:1 drr", + "$TC class add dev $DUMMY classid 2:1 parent 2:0 drr", + "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 2:1", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3:0 netem duplicate 100%" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "drr", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] From 3ffcd7b657c9d96eb3ffe174449b4248dd7fc6a9 Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Fri, 25 Apr 2025 15:26:31 -0700 Subject: [PATCH 218/559] ice: fix Get Tx Topology AQ command error on E830 The Get Tx Topology AQ command (opcode 0x0418) has different read flag requirements depending on the hardware/firmware. For E810, E822, and E823 firmware the read flag must be set, and for newer hardware (E825 and E830) it must not be set. This results in failure to configure Tx topology and the following warning message during probe: DDP package does not support Tx scheduling layers switching feature - please update to the latest DDP package and try again The current implementation only handles E825-C but not E830. It is confusing as we first check ice_is_e825c() and then set the flag in the set case. Finally, we check ice_is_e825c() again and set the flag for all other hardware in both the set and get case. Instead, notice that we always need the read flag for set, but only need the read flag for get on E810, E822, and E823 firmware. Fix the logic to check the MAC type and set the read flag in get only on the older devices which require it. Fixes: ba1124f58afd ("ice: Add E830 device IDs, MAC type and registers") Signed-off-by: Paul Greenwalt Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250425222636.3188441-2-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_ddp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index 69d5b1a28491..59323c019544 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -2345,15 +2345,15 @@ ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size, cmd->set_flags |= ICE_AQC_TX_TOPO_FLAGS_SRC_RAM | ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW; - if (hw->mac_type == ICE_MAC_GENERIC_3K_E825) - desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); } else { ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_tx_topo); cmd->get_flags = ICE_AQC_TX_TOPO_GET_RAM; - } - if (hw->mac_type != ICE_MAC_GENERIC_3K_E825) - desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + if (hw->mac_type == ICE_MAC_E810 || + hw->mac_type == ICE_MAC_GENERIC) + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + } status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); if (status) From 425c5f266b2edeee0ce16fedd8466410cdcfcfe3 Mon Sep 17 00:00:00 2001 From: Xuanqiang Luo Date: Fri, 25 Apr 2025 15:26:32 -0700 Subject: [PATCH 219/559] ice: Check VF VSI Pointer Value in ice_vc_add_fdir_fltr() As mentioned in the commit baeb705fd6a7 ("ice: always check VF VSI pointer values"), we need to perform a null pointer check on the return value of ice_get_vf_vsi() before using it. Fixes: 6ebbe97a4881 ("ice: Add a per-VF limit on number of FDIR filters") Signed-off-by: Xuanqiang Luo Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250425222636.3188441-3-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 7752920d7a8e..1cca9b2262e8 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -2097,6 +2097,11 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg) pf = vf->pf; dev = ice_pf_to_dev(pf); vf_vsi = ice_get_vf_vsi(vf); + if (!vf_vsi) { + dev_err(dev, "Can not get FDIR vf_vsi for VF %u\n", vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err_exit; + } #define ICE_VF_MAX_FDIR_FILTERS 128 if (!ice_fdir_num_avail_fltr(&pf->hw, vf_vsi) || From 713dd6c2deca88cba0596b1e2576f7b7a8e5c59e Mon Sep 17 00:00:00 2001 From: Madhu Chittim Date: Fri, 25 Apr 2025 15:26:33 -0700 Subject: [PATCH 220/559] idpf: fix offloads support for encapsulated packets Split offloads into csum, tso and other offloads so that tunneled packets do not by default have all the offloads enabled. Stateless offloads for encapsulated packets are not yet supported in firmware/software but in the driver we were setting the features same as non encapsulated features. Fixed naming to clarify CSUM bits are being checked for Tx. Inherit netdev features to VLAN interfaces as well. Fixes: 0fe45467a104 ("idpf: add create vport and netdev configuration") Reviewed-by: Sridhar Samudrala Signed-off-by: Madhu Chittim Tested-by: Zachary Goldstein Tested-by: Samuel Salin Signed-off-by: Tony Nguyen Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250425222636.3188441-4-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf.h | 18 +++---- drivers/net/ethernet/intel/idpf/idpf_lib.c | 57 ++++++++-------------- 2 files changed, 27 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 66544faab710..aef0e9775a33 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -629,13 +629,13 @@ bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all, VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4 |\ VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6) -#define IDPF_CAP_RX_CSUM_L4V4 (\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP) +#define IDPF_CAP_TX_CSUM_L4V4 (\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP |\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP) -#define IDPF_CAP_RX_CSUM_L4V6 (\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP) +#define IDPF_CAP_TX_CSUM_L4V6 (\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP |\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP) #define IDPF_CAP_RX_CSUM (\ VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 |\ @@ -644,11 +644,9 @@ bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all, VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |\ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP) -#define IDPF_CAP_SCTP_CSUM (\ +#define IDPF_CAP_TX_SCTP_CSUM (\ VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP |\ - VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP) + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP) #define IDPF_CAP_TUNNEL_TX_CSUM (\ VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL |\ diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index aa755dedb41d..730a9c7a59f2 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -703,8 +703,10 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) { struct idpf_adapter *adapter = vport->adapter; struct idpf_vport_config *vport_config; + netdev_features_t other_offloads = 0; + netdev_features_t csum_offloads = 0; + netdev_features_t tso_offloads = 0; netdev_features_t dflt_features; - netdev_features_t offloads = 0; struct idpf_netdev_priv *np; struct net_device *netdev; u16 idx = vport->idx; @@ -766,53 +768,32 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) if (idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) dflt_features |= NETIF_F_RXHASH; - if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V4)) - dflt_features |= NETIF_F_IP_CSUM; - if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V6)) - dflt_features |= NETIF_F_IPV6_CSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_TX_CSUM_L4V4)) + csum_offloads |= NETIF_F_IP_CSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_TX_CSUM_L4V6)) + csum_offloads |= NETIF_F_IPV6_CSUM; if (idpf_is_cap_ena(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM)) - dflt_features |= NETIF_F_RXCSUM; - if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_SCTP_CSUM)) - dflt_features |= NETIF_F_SCTP_CRC; + csum_offloads |= NETIF_F_RXCSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_TX_SCTP_CSUM)) + csum_offloads |= NETIF_F_SCTP_CRC; if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV4_TCP)) - dflt_features |= NETIF_F_TSO; + tso_offloads |= NETIF_F_TSO; if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV6_TCP)) - dflt_features |= NETIF_F_TSO6; + tso_offloads |= NETIF_F_TSO6; if (idpf_is_cap_ena_all(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV4_UDP | VIRTCHNL2_CAP_SEG_IPV6_UDP)) - dflt_features |= NETIF_F_GSO_UDP_L4; + tso_offloads |= NETIF_F_GSO_UDP_L4; if (idpf_is_cap_ena_all(adapter, IDPF_RSC_CAPS, IDPF_CAP_RSC)) - offloads |= NETIF_F_GRO_HW; - /* advertise to stack only if offloads for encapsulated packets is - * supported - */ - if (idpf_is_cap_ena(vport->adapter, IDPF_SEG_CAPS, - VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL)) { - offloads |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_GRE | - NETIF_F_GSO_GRE_CSUM | - NETIF_F_GSO_PARTIAL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_IPXIP4 | - NETIF_F_GSO_IPXIP6 | - 0; - - if (!idpf_is_cap_ena_all(vport->adapter, IDPF_CSUM_CAPS, - IDPF_CAP_TUNNEL_TX_CSUM)) - netdev->gso_partial_features |= - NETIF_F_GSO_UDP_TUNNEL_CSUM; - - netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; - offloads |= NETIF_F_TSO_MANGLEID; - } + other_offloads |= NETIF_F_GRO_HW; if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_LOOPBACK)) - offloads |= NETIF_F_LOOPBACK; + other_offloads |= NETIF_F_LOOPBACK; - netdev->features |= dflt_features; - netdev->hw_features |= dflt_features | offloads; - netdev->hw_enc_features |= dflt_features | offloads; + netdev->features |= dflt_features | csum_offloads | tso_offloads; + netdev->hw_features |= netdev->features | other_offloads; + netdev->vlan_features |= netdev->features | other_offloads; + netdev->hw_enc_features |= dflt_features | other_offloads; idpf_set_ethtool_ops(netdev); netif_set_affinity_auto(netdev); SET_NETDEV_DEV(netdev, &adapter->pdev->dev); From 9c51f24c1ac7cbde9cc94a54137775dc52aae491 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 22 Apr 2025 18:03:47 +0100 Subject: [PATCH 221/559] scsi: myrb: Fix spelling mistake "statux" -> "status" There is a spelling mistake in a dev_err() message. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20250422170347.66792-1-colin.i.king@gmail.com Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/myrb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index dc4bd422b601..486db5b2f05d 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -891,7 +891,7 @@ static bool myrb_enable_mmio(struct myrb_hba *cb, mbox_mmio_init_t mmio_init_fn) status = mmio_init_fn(pdev, base, &mbox); if (status != MYRB_STATUS_SUCCESS) { dev_err(&pdev->dev, - "Failed to enable mailbox, statux %02X\n", + "Failed to enable mailbox, status %02X\n", status); return false; } From 0e9693b97a0eee1df7bae33aec207c975fbcbdb8 Mon Sep 17 00:00:00 2001 From: Keoseong Park Date: Fri, 25 Apr 2025 10:06:05 +0900 Subject: [PATCH 222/559] scsi: ufs: core: Remove redundant query_complete trace The query_complete trace was not removed after ufshcd_issue_dev_cmd() was called from the bsg path, resulting in duplicate output. Below is an example of the trace: ufs-utils-773 [000] ..... 218.176933: ufshcd_upiu: query_send: 0000:00:04.0: HDR:16 00 00 1f 00 01 00 00 00 00 00 00, OSF:03 07 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ufs-utils-773 [000] ..... 218.177145: ufshcd_upiu: query_complete: 0000:00:04.0: HDR:36 00 00 1f 00 01 00 00 00 00 00 00, OSF:03 07 00 00 00 00 00 00 00 00 00 08 00 00 00 00 ufs-utils-773 [000] ..... 218.177146: ufshcd_upiu: query_complete: 0000:00:04.0: HDR:36 00 00 1f 00 01 00 00 00 00 00 00, OSF:03 07 00 00 00 00 00 00 00 00 00 08 00 00 00 00 Remove the redundant trace call in the bsg path, preventing duplication. Signed-off-by: Keoseong Park Link: https://lore.kernel.org/r/20250425010605epcms2p67e89b351398832fe0fd547404d3afc65@epcms2p6 Fixes: 71aabb747d5f ("scsi: ufs: core: Reuse exec_dev_cmd") Reviewed-by: Avri Altman Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 5cb6132b8147..7735421e3991 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -7265,8 +7265,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, err = -EINVAL; } } - ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP, - (struct utp_upiu_req *)lrbp->ucd_rsp_ptr); return err; } From 652dd6558b8b5a1a04fef129e0231ee493e24951 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 20:12:01 -0400 Subject: [PATCH 223/559] bcachefs: btree_root_unreadable_and_scan_found_nothing autofix for non data btrees If loosing a btree won't cause data loss - i.e. it's an alloc btree, or we can easily reconstruct it - we shouldn't require user action to continue repair. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 7b98ba2dec64..37b69d89341f 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -47,6 +47,27 @@ #define DROP_PREV_NODE 11 #define DID_FILL_FROM_SCAN 12 +/* + * Returns true if it's a btree we can easily reconstruct, or otherwise won't + * cause data loss if it's missing: + */ +static bool btree_id_important(enum btree_id btree) +{ + if (btree_id_is_alloc(btree)) + return false; + + switch (btree) { + case BTREE_ID_quotas: + case BTREE_ID_snapshot_trees: + case BTREE_ID_logged_ops: + case BTREE_ID_rebalance_work: + case BTREE_ID_subvolume_children: + return false; + default: + return true; + } +} + static const char * const bch2_gc_phase_strs[] = { #define x(n) #n, GC_PHASES() @@ -534,8 +555,10 @@ reconstruct_root: r->error = 0; if (!bch2_btree_has_scanned_nodes(c, i)) { - mustfix_fsck_err(trans, btree_root_unreadable_and_scan_found_nothing, - "no nodes found for btree %s, continue?", buf.buf); + __fsck_err(trans, + FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0), + btree_root_unreadable_and_scan_found_nothing, + "no nodes found for btree %s, continue?", buf.buf); bch2_btree_root_alloc_fake_trans(trans, i, 0); } else { bch2_btree_root_alloc_fake_trans(trans, i, 1); From e5a3b8cf3330a774e5f5f06a2b7cf20116447297 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 20:25:15 -0400 Subject: [PATCH 224/559] bcachefs: More informative error message when shutting down due to error Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 925b0b54ea2f..6b8695b1349c 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -478,7 +478,9 @@ int __bch2_fsck_err(struct bch_fs *c, } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) { if (c->opts.errors != BCH_ON_ERROR_continue || !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { - prt_str(out, ", shutting down"); + prt_str_indented(out, ", shutting down\n" + "error not marked as autofix and not in fsck\n" + "run fsck, and forward to devs so error can be marked for self-healing"); inconsistent = true; print = true; ret = -BCH_ERR_fsck_errors_not_fixed; From 9a4a858c9b365d817231f6f3592dc26e9d4191bb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 20:28:58 -0400 Subject: [PATCH 225/559] bcachefs: Use bch2_kvmalloc() for journal keys array We can hit this limit fairly easy when we have to reconstuct large amounts of alloc info on large filesystems. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_journal_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 7d6c971db23c..ade3b5addd75 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -288,7 +288,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, .size = max_t(size_t, keys->size, 8) * 2, }; - new_keys.data = kvmalloc_array(new_keys.size, sizeof(new_keys.data[0]), GFP_KERNEL); + new_keys.data = bch2_kvmalloc(new_keys.size * sizeof(new_keys.data[0]), GFP_KERNEL); if (!new_keys.data) { bch_err(c, "%s: error allocating new key array (size %zu)", __func__, new_keys.size); From dbe4674802ec8e43835900490b3492299464ad27 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 20:38:04 -0400 Subject: [PATCH 226/559] bcachefs: Topology error after insert is now an ERO A user hit this, and this will naturally be easier to debug if we don't panic. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 49 +++++++++++++++++++---------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 44b5fe430370..00307356d7c8 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1389,7 +1389,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, printbuf_exit(&buf); } -static void +static int bch2_btree_insert_keys_interior(struct btree_update *as, struct btree_trans *trans, struct btree_path *path, @@ -1411,7 +1411,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, insert = bkey_next(insert)) bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert); - if (bch2_btree_node_check_topology(trans, b)) { + int ret = bch2_btree_node_check_topology(trans, b); + if (ret) { struct printbuf buf = PRINTBUF; for (struct bkey_i *k = keys->keys; @@ -1421,11 +1422,15 @@ bch2_btree_insert_keys_interior(struct btree_update *as, prt_newline(&buf); } - panic("%s(): check_topology error: inserted keys\n%s", __func__, buf.buf); + bch2_fs_fatal_error(as->c, "%ps -> %s(): check_topology error %s: inserted keys\n%s", + (void *) _RET_IP_, __func__, bch2_err_str(ret), buf.buf); + dump_stack(); + return ret; } memmove_u64s_down(keys->keys, insert, keys->top_p - insert->_data); keys->top_p -= insert->_data - keys->keys_p; + return 0; } static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos) @@ -1559,11 +1564,11 @@ static void __btree_split_node(struct btree_update *as, * nodes that were coalesced, and thus in the middle of a child node post * coalescing: */ -static void btree_split_insert_keys(struct btree_update *as, - struct btree_trans *trans, - btree_path_idx_t path_idx, - struct btree *b, - struct keylist *keys) +static int btree_split_insert_keys(struct btree_update *as, + struct btree_trans *trans, + btree_path_idx_t path_idx, + struct btree *b, + struct keylist *keys) { struct btree_path *path = trans->paths + path_idx; @@ -1573,8 +1578,12 @@ static void btree_split_insert_keys(struct btree_update *as, bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p); - bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); + int ret = bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); + if (ret) + return ret; } + + return 0; } static int btree_split(struct btree_update *as, struct btree_trans *trans, @@ -1607,8 +1616,10 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, __btree_split_node(as, trans, b, n, keys); if (keys) { - btree_split_insert_keys(as, trans, path, n1, keys); - btree_split_insert_keys(as, trans, path, n2, keys); + ret = btree_split_insert_keys(as, trans, path, n1, keys) ?: + btree_split_insert_keys(as, trans, path, n2, keys); + if (ret) + goto err; BUG_ON(!bch2_keylist_empty(keys)); } @@ -1654,7 +1665,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, n3->sib_u64s[0] = U16_MAX; n3->sib_u64s[1] = U16_MAX; - btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); + ret = btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); + if (ret) + goto err; } } else { trace_and_count(c, btree_node_compact, trans, b); @@ -1662,7 +1675,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, n1 = bch2_btree_node_alloc_replacement(as, trans, b); if (keys) { - btree_split_insert_keys(as, trans, path, n1, keys); + ret = btree_split_insert_keys(as, trans, path, n1, keys); + if (ret) + goto err; BUG_ON(!bch2_keylist_empty(keys)); } @@ -1809,15 +1824,15 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t goto split; } - ret = bch2_btree_node_check_topology(trans, b); + + ret = bch2_btree_node_check_topology(trans, b) ?: + bch2_btree_insert_keys_interior(as, trans, path, b, + path->l[b->c.level].iter, keys); if (ret) { bch2_btree_node_unlock_write(trans, path, b); return ret; } - bch2_btree_insert_keys_interior(as, trans, path, b, - path->l[b->c.level].iter, keys); - trans_for_each_path_with_node(trans, b, linked, i) bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); From bbfe756dc3062c1e934f06e5ba39c239aa953b92 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Tue, 29 Apr 2025 01:09:33 +0200 Subject: [PATCH 227/559] fs/erofs/fileio: call erofs_onlinefolio_split() after bio_add_folio() If bio_add_folio() fails (because it is full), erofs_fileio_scan_folio() needs to submit the I/O request via erofs_fileio_rq_submit() and allocate a new I/O request with an empty `struct bio`. Then it retries the bio_add_folio() call. However, at this point, erofs_onlinefolio_split() has already been called which increments `folio->private`; the retry will call erofs_onlinefolio_split() again, but there will never be a matching erofs_onlinefolio_end() call. This leaves the folio locked forever and all waiters will be stuck in folio_wait_bit_common(). This bug has been added by commit ce63cb62d794 ("erofs: support unencoded inodes for fileio"), but was practically unreachable because there was room for 256 folios in the `struct bio` - until commit 9f74ae8c9ac9 ("erofs: shorten bvecs[] for file-backed mounts") which reduced the array capacity to 16 folios. It was now trivial to trigger the bug by manually invoking readahead from userspace, e.g.: posix_fadvise(fd, 0, st.st_size, POSIX_FADV_WILLNEED); This should be fixed by invoking erofs_onlinefolio_split() only after bio_add_folio() has succeeded. This is safe: asynchronous completions invoking erofs_onlinefolio_end() will not unlock the folio because erofs_fileio_scan_folio() is still holding a reference to be released by erofs_onlinefolio_end() at the end. Fixes: ce63cb62d794 ("erofs: support unencoded inodes for fileio") Fixes: 9f74ae8c9ac9 ("erofs: shorten bvecs[] for file-backed mounts") Cc: stable@vger.kernel.org Signed-off-by: Max Kellermann Reviewed-by: Gao Xiang Tested-by: Hongbo Li Link: https://lore.kernel.org/r/20250428230933.3422273-1-max.kellermann@ionos.com Signed-off-by: Gao Xiang --- fs/erofs/fileio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index 4fa0a0121288..60c7cc4c105c 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -150,10 +150,10 @@ io_retry: io->rq->bio.bi_iter.bi_sector = io->dev.m_pa >> 9; attached = 0; } - if (!attached++) - erofs_onlinefolio_split(folio); if (!bio_add_folio(&io->rq->bio, folio, len, cur)) goto io_retry; + if (!attached++) + erofs_onlinefolio_split(folio); io->dev.m_pa += len; } cur += len; From c1c9cad50c5c35cd4de1b54af59a28bf07451593 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Sun, 23 Mar 2025 05:49:06 -0700 Subject: [PATCH 228/559] drm/xe/svm: fix dereferencing error pointer in drm_gpusvm_range_alloc() xe_svm_range_alloc() returns ERR_PTR(-ENOMEM) on failure and there is a dereference of "range" after that: --> range->gpusvm = gpusvm; In xe_svm_range_alloc(), when memory allocation fails return NULL instead to handle this situation. Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/adaef4dd-5866-48ca-bc22-4a1ddef20381@stanley.mountain/ Signed-off-by: Harshit Mogalapalli Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250323124907.3946370-1-harshit.m.mogalapalli@oracle.com (cherry picked from commit 7a0322122cfdd9a6f10fc7701023d75c98eb3d22) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index f8c128524d9f..0b6547c06961 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -79,7 +79,7 @@ xe_svm_range_alloc(struct drm_gpusvm *gpusvm) range = kzalloc(sizeof(*range), GFP_KERNEL); if (!range) - return ERR_PTR(-ENOMEM); + return NULL; INIT_LIST_HEAD(&range->garbage_collector_link); xe_vm_get(gpusvm_to_vm(gpusvm)); From 5e639707ddb8f080fbde805a1bfa6668a1b45298 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 17 Apr 2025 12:52:12 -0700 Subject: [PATCH 229/559] drm/xe/guc: Fix capture of steering registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The list of registers to capture on a GPU hang includes some that require steering. Unfortunately, the flag to say this was being wiped to due a missing OR on the assignment of the next flag field. Fix that. Fixes: b170d696c1e2 ("drm/xe/guc: Add XE_LP steered register lists") Cc: Zhanjun Dong Cc: Alan Previn Cc: Matt Roper Cc: Lucas De Marchi Cc: "Thomas Hellström" Cc: Rodrigo Vivi Cc: intel-xe@lists.freedesktop.org Signed-off-by: John Harrison Reviewed-by: Matt Roper Reviewed-by: Zhanjun Dong Link: https://lore.kernel.org/r/20250417195215.3002210-2-John.C.Harrison@Intel.com (cherry picked from commit 532da44b54a10d50ebad14a8a02bd0b78ec23e8b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_capture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index f6d523e4c5fe..9095618648bc 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -359,7 +359,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, ext->reg = XE_REG(extlist->reg.__reg.addr); ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1); - ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); + ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id); ext->regname = extlist->name; } From b1852c5de2f2a37dd4462f7837c9e3e678f9e546 Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Mon, 21 Apr 2025 14:23:41 +0800 Subject: [PATCH 230/559] i2c: imx-lpi2c: Fix clock count when probe defers Deferred probe with pm_runtime_put() may delay clock disable, causing incorrect clock usage count. Use pm_runtime_put_sync() to ensure the clock is disabled immediately. Fixes: 13d6eb20fc79 ("i2c: imx-lpi2c: add runtime pm support") Signed-off-by: Clark Wang Signed-off-by: Carlos Song Cc: # v4.16+ Link: https://lore.kernel.org/r/20250421062341.2471922-1-carlos.song@nxp.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-imx-lpi2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 0d4b3935e687..342d47e67586 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -1380,9 +1380,9 @@ static int lpi2c_imx_probe(struct platform_device *pdev) return 0; rpm_disable: - pm_runtime_put(&pdev->dev); - pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); return ret; } From 12b8a672d2aa053064151659f49e7310674d42d3 Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Tue, 29 Apr 2025 09:32:29 +0530 Subject: [PATCH 231/559] pinctrl: qcom: Fix PINGROUP definition for sm8750 On newer SoCs intr_target_bit position is at 8 instead of 5. Fix it. Also add missing intr_wakeup_present_bit and intr_wakeup_enable_bit which enables forwarding of GPIO interrupts to parent PDC interrupt controller. Fixes: afe9803e3b82 ("pinctrl: qcom: Add sm8750 pinctrl driver") Signed-off-by: Maulik Shah Reviewed-by: Abel Vesa Reviewed-by: Dmitry Baryshkov Reviewed-by: Melody Olvera Link: https://lore.kernel.org/20250429-pinctrl_sm8750-v2-1-87d45dd3bd82@oss.qualcomm.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-sm8750.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8750.c b/drivers/pinctrl/qcom/pinctrl-sm8750.c index 1af11cd95fb0..b94fb4ee0ec3 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8750.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8750.c @@ -46,7 +46,9 @@ .out_bit = 1, \ .intr_enable_bit = 0, \ .intr_status_bit = 0, \ - .intr_target_bit = 5, \ + .intr_wakeup_present_bit = 6, \ + .intr_wakeup_enable_bit = 7, \ + .intr_target_bit = 8, \ .intr_target_kpss_val = 3, \ .intr_raw_status_bit = 4, \ .intr_polarity_bit = 1, \ From 730d837979bac203c786f2c5b0707f5426275c0d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 29 Apr 2025 10:29:36 +0800 Subject: [PATCH 232/559] selftests: ublk: fix UBLK_F_NEED_GET_DATA Commit 57e13a2e8cd2 ("selftests: ublk: support user recovery") starts to support UBLK_F_NEED_GET_DATA for covering recovery feature, however the ublk utility implementation isn't done correctly. Fix it by supporting UBLK_F_NEED_GET_DATA correctly. Also add test generic_07 for covering UBLK_F_NEED_GET_DATA. Reviewed-by: Caleb Sander Mateos Fixes: 57e13a2e8cd2 ("selftests: ublk: support user recovery") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250429022941.1718671-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 1 + tools/testing/selftests/ublk/kublk.c | 22 +++++++++------ tools/testing/selftests/ublk/kublk.h | 1 + .../testing/selftests/ublk/test_generic_07.sh | 28 +++++++++++++++++++ .../testing/selftests/ublk/test_stress_05.sh | 8 +++--- 5 files changed, 48 insertions(+), 12 deletions(-) create mode 100755 tools/testing/selftests/ublk/test_generic_07.sh diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index ec4624a283bc..f34ac0bac696 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -9,6 +9,7 @@ TEST_PROGS += test_generic_03.sh TEST_PROGS += test_generic_04.sh TEST_PROGS += test_generic_05.sh TEST_PROGS += test_generic_06.sh +TEST_PROGS += test_generic_07.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index e57a1486bb48..842b40736a9b 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -536,12 +536,17 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) if (!(io->flags & UBLKSRV_IO_FREE)) return 0; - /* we issue because we need either fetching or committing */ + /* + * we issue because we need either fetching or committing or + * getting data + */ if (!(io->flags & - (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP))) + (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA))) return 0; - if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) + if (io->flags & UBLKSRV_NEED_GET_DATA) + cmd_op = UBLK_U_IO_NEED_GET_DATA; + else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; else if (io->flags & UBLKSRV_NEED_FETCH_RQ) cmd_op = UBLK_U_IO_FETCH_REQ; @@ -658,6 +663,9 @@ static void ublk_handle_cqe(struct io_uring *r, assert(tag < q->q_depth); if (q->tgt_ops->queue_io) q->tgt_ops->queue_io(q, tag); + } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { + io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE; + ublk_queue_io_cmd(q, io, tag); } else { /* * COMMIT_REQ will be completed immediately since no fetching @@ -1237,7 +1245,7 @@ static void __cmd_create_help(char *exe, bool recovery) printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", exe, recovery ? "recover" : "add"); - printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g 0|1]\n"); + printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n"); printf("\t[-e 0|1 ] [-i 0|1]\n"); printf("\t[target options] [backfile1] [backfile2] ...\n"); printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); @@ -1313,7 +1321,7 @@ int main(int argc, char *argv[]) opterr = 0; optind = 2; - while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:az", + while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:gaz", longopts, &option_idx)) != -1) { switch (opt) { case 'a': @@ -1351,9 +1359,7 @@ int main(int argc, char *argv[]) ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; break; case 'g': - value = strtol(optarg, NULL, 10); - if (value) - ctx.flags |= UBLK_F_NEED_GET_DATA; + ctx.flags |= UBLK_F_NEED_GET_DATA; break; case 0: if (!strcmp(longopts[option_idx].name, "debug_mask")) diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 918db5cd633f..44ee1e4ac55b 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -115,6 +115,7 @@ struct ublk_io { #define UBLKSRV_NEED_FETCH_RQ (1UL << 0) #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) #define UBLKSRV_IO_FREE (1UL << 2) +#define UBLKSRV_NEED_GET_DATA (1UL << 3) unsigned short flags; unsigned short refs; /* used by target code only */ diff --git a/tools/testing/selftests/ublk/test_generic_07.sh b/tools/testing/selftests/ublk/test_generic_07.sh new file mode 100755 index 000000000000..cba86451fa5e --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_07.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_07" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_NEED_GET_DATA" + +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -q 2 -g "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? +if [ "$ERR_CODE" -eq 0 ]; then + _mkfs_mount_test /dev/ublkb"${dev_id}" + ERR_CODE=$? +fi + +_cleanup_test "generic" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh index a7071b10224d..88601b48f1cd 100755 --- a/tools/testing/selftests/ublk/test_stress_05.sh +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -47,15 +47,15 @@ _create_backfile 0 256M _create_backfile 1 256M for reissue in $(seq 0 1); do - ublk_io_and_remove 8G -t null -q 4 -g 1 -r 1 -i "$reissue" & - ublk_io_and_remove 256M -t loop -q 4 -g 1 -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & + ublk_io_and_remove 8G -t null -q 4 -g -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & wait done if _have_feature "ZERO_COPY"; then for reissue in $(seq 0 1); do - ublk_io_and_remove 8G -t null -q 4 -g 1 -z -r 1 -i "$reissue" & - ublk_io_and_remove 256M -t loop -q 4 -g 1 -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + ublk_io_and_remove 8G -t null -q 4 -g -z -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & wait done fi From 69edf98be844375807f299397c516fb1e962b3cc Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 29 Apr 2025 10:29:37 +0800 Subject: [PATCH 233/559] ublk: decouple zero copy from user copy UBLK_F_USER_COPY and UBLK_F_SUPPORT_ZERO_COPY are two different features, and shouldn't be coupled together. Commit 1f6540e2aabb ("ublk: zc register/unregister bvec") enables user copy automatically in case of UBLK_F_SUPPORT_ZERO_COPY, this way isn't correct. So decouple zero copy from user copy, and use independent helper to check each one. Fixes: 1f6540e2aabb ("ublk: zc register/unregister bvec") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250429022941.1718671-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 40f971a66d3e..0a3a3c64316d 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -205,11 +205,6 @@ static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, static inline unsigned int ublk_req_build_flags(struct request *req); static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, int tag); -static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub) -{ - return ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY); -} - static inline bool ublk_dev_is_zoned(const struct ublk_device *ub) { return ub->dev_info.flags & UBLK_F_ZONED; @@ -609,14 +604,19 @@ static void ublk_apply_params(struct ublk_device *ub) ublk_dev_param_zoned_apply(ub); } +static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq) +{ + return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY; +} + static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) { - return ubq->flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY); + return ubq->flags & UBLK_F_USER_COPY; } static inline bool ublk_need_map_io(const struct ublk_queue *ubq) { - return !ublk_support_user_copy(ubq); + return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq); } static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) @@ -624,8 +624,11 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) /* * read()/write() is involved in user copy, so request reference * has to be grabbed + * + * for zero copy, request buffer need to be registered to io_uring + * buffer table, so reference is needed */ - return ublk_support_user_copy(ubq); + return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq); } static inline void ublk_init_req_ref(const struct ublk_queue *ubq, @@ -2245,6 +2248,9 @@ static struct request *ublk_check_and_get_req(struct kiocb *iocb, if (!ubq) return ERR_PTR(-EINVAL); + if (!ublk_support_user_copy(ubq)) + return ERR_PTR(-EACCES); + if (tag >= ubq->q_depth) return ERR_PTR(-EINVAL); @@ -2783,13 +2789,18 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE | UBLK_F_URING_CMD_COMP_IN_TASK; - /* GET_DATA isn't needed any more with USER_COPY */ - if (ublk_dev_is_user_copy(ub)) + /* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */ + if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)) ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA; - /* Zoned storage support requires user copy feature */ + /* + * Zoned storage support requires reuse `ublksrv_io_cmd->addr` for + * returning write_append_lba, which is only allowed in case of + * user copy or zero copy + */ if (ublk_dev_is_zoned(ub) && - (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !ublk_dev_is_user_copy(ub))) { + (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !(ub->dev_info.flags & + (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)))) { ret = -EINVAL; goto out_free_dev_number; } From 6240f43b29f285a40eebeb789756673af7a7d67c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 29 Apr 2025 10:29:38 +0800 Subject: [PATCH 234/559] ublk: enhance check for register/unregister io buffer command The simple check of UBLK_IO_FLAG_OWNED_BY_SRV can avoid incorrect register/unregister io buffer easily, so check it before calling starting to register/un-register io buffer. Also only allow io buffer register/unregister uring_cmd in case of UBLK_F_SUPPORT_ZERO_COPY. Also mark argument 'ublk_queue *' of ublk_register_io_buf as const. Reviewed-by: Caleb Sander Mateos Fixes: 1f6540e2aabb ("ublk: zc register/unregister bvec") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250429022941.1718671-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 0a3a3c64316d..c624d8f653ae 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -201,7 +201,7 @@ struct ublk_params_header { static void ublk_stop_dev_unlocked(struct ublk_device *ub); static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - struct ublk_queue *ubq, int tag, size_t offset); + const struct ublk_queue *ubq, int tag, size_t offset); static inline unsigned int ublk_req_build_flags(struct request *req); static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, int tag); @@ -1949,13 +1949,20 @@ static void ublk_io_release(void *priv) } static int ublk_register_io_buf(struct io_uring_cmd *cmd, - struct ublk_queue *ubq, unsigned int tag, + const struct ublk_queue *ubq, unsigned int tag, unsigned int index, unsigned int issue_flags) { struct ublk_device *ub = cmd->file->private_data; + const struct ublk_io *io = &ubq->ios[tag]; struct request *req; int ret; + if (!ublk_support_zero_copy(ubq)) + return -EINVAL; + + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) + return -EINVAL; + req = __ublk_check_and_get_req(ub, ubq, tag, 0); if (!req) return -EINVAL; @@ -1971,8 +1978,17 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd, } static int ublk_unregister_io_buf(struct io_uring_cmd *cmd, + const struct ublk_queue *ubq, unsigned int tag, unsigned int index, unsigned int issue_flags) { + const struct ublk_io *io = &ubq->ios[tag]; + + if (!ublk_support_zero_copy(ubq)) + return -EINVAL; + + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) + return -EINVAL; + return io_buffer_unregister_bvec(cmd, index, issue_flags); } @@ -2076,7 +2092,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, case UBLK_IO_REGISTER_IO_BUF: return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags); case UBLK_IO_UNREGISTER_IO_BUF: - return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags); + return ublk_unregister_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags); case UBLK_IO_FETCH_REQ: ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr); if (ret) @@ -2128,7 +2144,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, } static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - struct ublk_queue *ubq, int tag, size_t offset) + const struct ublk_queue *ubq, int tag, size_t offset) { struct request *req; From a584b2630b0d31f8a20e4ccb4de370b160177b8a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 29 Apr 2025 10:29:39 +0800 Subject: [PATCH 235/559] ublk: remove the check of ublk_need_req_ref() from __ublk_check_and_get_req __ublk_check_and_get_req() is only called from ublk_check_and_get_req() and ublk_register_io_buf(), the same check has been covered in the two calling sites. So remove the check from __ublk_check_and_get_req(). Suggested-by: Caleb Sander Mateos Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250429022941.1718671-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c624d8f653ae..f9032076bc06 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2148,9 +2148,6 @@ static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, { struct request *req; - if (!ublk_need_req_ref(ubq)) - return NULL; - req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); if (!req) return NULL; From 56f1f30e6795b890463d9b20b11e576adf5a2f77 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Apr 2025 14:48:41 +0200 Subject: [PATCH 236/559] ALSA: ump: Fix buffer overflow at UMP SysEx message conversion The conversion function from MIDI 1.0 to UMP packet contains an internal buffer to keep the incoming MIDI bytes, and its size is 4, as it was supposed to be the max size for a MIDI1 UMP packet data. However, the implementation overlooked that SysEx is handled in a different format, and it can be up to 6 bytes, as found in do_convert_to_ump(). It leads eventually to a buffer overflow, and may corrupt the memory when a longer SysEx message is received. The fix is simply to extend the buffer size to 6 to fit with the SysEx UMP message. Fixes: 0b5288f5fe63 ("ALSA: ump: Add legacy raw MIDI support") Reported-by: Argusee Link: https://patch.msgid.link/20250429124845.25128-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/ump_convert.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/ump_convert.h b/include/sound/ump_convert.h index d099ae27f849..682499b871ea 100644 --- a/include/sound/ump_convert.h +++ b/include/sound/ump_convert.h @@ -19,7 +19,7 @@ struct ump_cvt_to_ump_bank { /* context for converting from MIDI1 byte stream to UMP packet */ struct ump_cvt_to_ump { /* MIDI1 intermediate buffer */ - unsigned char buf[4]; + unsigned char buf[6]; /* up to 6 bytes for SysEx */ int len; int cmd_bytes; From a75401227eeb827b1a162df1aa9d5b33da921c43 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 24 Apr 2025 10:18:01 -0700 Subject: [PATCH 237/559] nvme-pci: fix queue unquiesce check on slot_reset A zero return means the reset was successfully scheduled. We don't want to unquiesce the queues while the reset_work is pending, as that will just flush out requeued requests to a failed completion. Fixes: 71a5bb153be104 ("nvme: ensure disabling pairs with unquiesce") Reported-by: Dhankaran Singh Ajravat Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b178d52eac1b..c9e2a5450bc0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3575,7 +3575,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); - if (!nvme_try_sched_reset(&dev->ctrl)) + if (nvme_try_sched_reset(&dev->ctrl)) nvme_unquiesce_io_queues(&dev->ctrl); return PCI_ERS_RESULT_RECOVERED; } From 5b960f92ac3e5b4d7f60a506a6b6735eead1da01 Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Tue, 22 Apr 2025 20:17:25 +0800 Subject: [PATCH 238/559] nvme-pci: add quirks for device 126f:1001 This commit adds NVME_QUIRK_NO_DEEPEST_PS and NVME_QUIRK_BOGUS_NID for device [126f:1001]. It is similar to commit e89086c43f05 ("drivers/nvme: Add quirks for device 126f:2262") Diff is according the dmesg, use NVME_QUIRK_IGNORE_DEV_SUBNQN. dmesg | grep -i nvme0: nvme nvme0: pci function 0000:01:00.0 nvme nvme0: missing or invalid SUBNQN field. nvme nvme0: 12/0/0 default/read/poll queues Link:https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e89086c43f0500bc7c4ce225495b73b8ce234c1f Signed-off-by: Wentao Guan Signed-off-by: WangYuli Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c9e2a5450bc0..a8d4443edc49 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3623,6 +3623,9 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1217, 0x8760), /* O2 Micro 64GB Steam Deck */ .driver_data = NVME_QUIRK_DMAPOOL_ALIGN_512, }, + { PCI_DEVICE(0x126f, 0x1001), /* Silicon Motion generic */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x126f, 0x2262), /* Silicon Motion generic */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_BOGUS_NID, }, From ab35ad950d439ec3409509835d229b3d93d3c7f9 Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Thu, 24 Apr 2025 10:40:10 +0800 Subject: [PATCH 239/559] nvme-pci: add quirks for WDC Blue SN550 15b7:5009 Add two quirks for the WDC Blue SN550 (PCI ID 15b7:5009) based on user reports and hardware analysis: - NVME_QUIRK_NO_DEEPEST_PS: liaozw talked to me the problem and solved with nvme_core.default_ps_max_latency_us=0, so add the quirk. I also found some reports in the following link. - NVME_QUIRK_BROKEN_MSI: after get the lspci from Jack Rio. I think that the disk also have NVME_QUIRK_BROKEN_MSI. described in commit d5887dc6b6c0 ("nvme-pci: Add quirk for broken MSIs") as sean said in link which match the MSI 1/32 and MSI-X 17. Log: lspci -nn | grep -i memory 03:00.0 Non-Volatile memory controller [0108]: Sandisk Corp SanDisk Ultra 3D / WD PC SN530, IX SN530, Blue SN550 NVMe SSD (DRAM-less) [15b7:5009] (rev 01) lspci -v -d 15b7:5009 03:00.0 Non-Volatile memory controller: Sandisk Corp SanDisk Ultra 3D / WD PC SN530, IX SN530, Blue SN550 NVMe SSD (DRAM-less) (rev 01) (prog-if 02 [NVM Express]) Subsystem: Sandisk Corp WD Blue SN550 NVMe SSD Flags: bus master, fast devsel, latency 0, IRQ 35, IOMMU group 10 Memory at fe800000 (64-bit, non-prefetchable) [size=16K] Memory at fe804000 (64-bit, non-prefetchable) [size=256] Capabilities: [80] Power Management version 3 Capabilities: [90] MSI: Enable- Count=1/32 Maskable- 64bit+ Capabilities: [b0] MSI-X: Enable+ Count=17 Masked- Capabilities: [c0] Express Endpoint, MSI 00 Capabilities: [100] Advanced Error Reporting Capabilities: [150] Device Serial Number 00-00-00-00-00-00-00-00 Capabilities: [1b8] Latency Tolerance Reporting Capabilities: [300] Secondary PCI Express Capabilities: [900] L1 PM Substates Kernel driver in use: nvme dmesg | grep nvme [ 0.000000] Command line: BOOT_IMAGE=/vmlinuz-6.12.20-amd64-desktop-rolling root=UUID= ro splash quiet nvme_core.default_ps_max_latency_us=0 DEEPIN_GFXMODE= [ 0.059301] Kernel command line: BOOT_IMAGE=/vmlinuz-6.12.20-amd64-desktop-rolling root=UUID= ro splash quiet nvme_core.default_ps_max_latency_us=0 DEEPIN_GFXMODE= [ 0.542430] nvme nvme0: pci function 0000:03:00.0 [ 0.560426] nvme nvme0: allocated 32 MiB host memory buffer. [ 0.562491] nvme nvme0: 16/0/0 default/read/poll queues [ 0.567764] nvme0n1: p1 p2 p3 p4 p5 p6 p7 p8 p9 [ 6.388726] EXT4-fs (nvme0n1p7): mounted filesystem ro with ordered data mode. Quota mode: none. [ 6.893421] EXT4-fs (nvme0n1p7): re-mounted r/w. Quota mode: none. [ 7.125419] Adding 16777212k swap on /dev/nvme0n1p8. Priority:-2 extents:1 across:16777212k SS [ 7.157588] EXT4-fs (nvme0n1p6): mounted filesystem r/w with ordered data mode. Quota mode: none. [ 7.165021] EXT4-fs (nvme0n1p9): mounted filesystem r/w with ordered data mode. Quota mode: none. [ 8.036932] nvme nvme0: using unchecked data buffer [ 8.096023] block nvme0n1: No UUID available providing old NGUID Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d5887dc6b6c054d0da3cd053afc15b7be1f45ff6 Link: https://lore.kernel.org/all/20240422162822.3539156-1-sean.anderson@linux.dev/ Reported-by: liaozw Closes: https://bbs.deepin.org.cn/post/286300 Reported-by: rugk Closes: https://bugzilla.kernel.org/show_bug.cgi?id=208123 Signed-off-by: Wentao Guan Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a8d4443edc49..2e30e9be7408 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3649,6 +3649,9 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x15b7, 0x5008), /* Sandisk SN530 */ .driver_data = NVME_QUIRK_BROKEN_MSI }, + { PCI_DEVICE(0x15b7, 0x5009), /* Sandisk SN550 */ + .driver_data = NVME_QUIRK_BROKEN_MSI | + NVME_QUIRK_NO_DEEPEST_PS }, { PCI_DEVICE(0x1987, 0x5012), /* Phison E12 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ From 48ccf21fa8dc595c8aa4f1d347b593dcae0727d0 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 8 Apr 2025 16:07:58 +0200 Subject: [PATCH 240/559] drm/tests: shmem: Fix memleak The drm_gem_shmem_test_get_pages_sgt() gets a scatter-gather table using the drm_gem_shmem_get_sg_table() function and rightfully calls sg_free_table() on it. However, it's also supposed to kfree() the returned sg_table, but doesn't. This leads to a memory leak, reported by kmemleak. Fix it by adding a kunit action to kfree the sgt when the test ends. Reported-by: Philipp Stanner Closes: https://lore.kernel.org/dri-devel/a7655158a6367ac46194d57f4b7433ef0772a73e.camel@mailbox.org/ Fixes: 93032ae634d4 ("drm/test: add a test suite for GEM objects backed by shmem") Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250408140758.1831333-1-mripard@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_gem_shmem_test.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c index fd4215e2f982..925fbc2cda70 100644 --- a/drivers/gpu/drm/tests/drm_gem_shmem_test.c +++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c @@ -216,6 +216,9 @@ static void drm_gem_shmem_test_get_pages_sgt(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt); KUNIT_EXPECT_NULL(test, shmem->sgt); + ret = kunit_add_action_or_reset(test, kfree_wrapper, sgt); + KUNIT_ASSERT_EQ(test, ret, 0); + ret = kunit_add_action_or_reset(test, sg_free_table_wrapper, sgt); KUNIT_ASSERT_EQ(test, ret, 0); From 1a8bc0fe8039e1e57f68c4a588f0403d98bfeb1f Mon Sep 17 00:00:00 2001 From: Russell Cloran Date: Mon, 14 Apr 2025 22:32:59 -0700 Subject: [PATCH 241/559] drm/mipi-dbi: Fix blanking for non-16 bit formats On r6x2b6x2g6x2 displays not enough blank data is sent to blank the entire screen. When support for these displays was added, the dirty function was updated to handle the different amount of data, but blanking was not, and remained hardcoded as 2 bytes per pixel. This change applies almost the same algorithm used in the dirty function to the blank function, but there is no fb available at that point, and no concern about having to transform any data, so the dbidev pixel format is always used for calculating the length. Fixes: 4aebb79021f3 ("drm/mipi-dbi: Add support for DRM_FORMAT_RGB888") Signed-off-by: Russell Cloran Link: https://lore.kernel.org/r/20250415053259.79572-1-rcloran@gmail.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/drm_mipi_dbi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index 89e05a5bed1d..a4cd476f9b30 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -404,12 +404,16 @@ static void mipi_dbi_blank(struct mipi_dbi_dev *dbidev) u16 height = drm->mode_config.min_height; u16 width = drm->mode_config.min_width; struct mipi_dbi *dbi = &dbidev->dbi; - size_t len = width * height * 2; + const struct drm_format_info *dst_format; + size_t len; int idx; if (!drm_dev_enter(drm, &idx)) return; + dst_format = drm_format_info(dbidev->pixel_format); + len = drm_format_info_min_pitch(dst_format, 0, width) * height; + memset(dbidev->tx_buf, 0, len); mipi_dbi_set_window_address(dbidev, 0, width - 1, 0, height - 1); From de2b2107d5a41a91ab603e135fb6e408abbee28e Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:06:58 +0200 Subject: [PATCH 242/559] arm64: dts: st: Adjust interrupt-controller for stm32mp25 SoCs Use gic-400 compatible and remove address-cells = <1> on aarch64 Fixes: 5d30d03aaf785 ("arm64: dts: st: introduce stm32mp25 SoCs family") Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-2-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp251.dtsi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp251.dtsi b/arch/arm64/boot/dts/st/stm32mp251.dtsi index f3c6cdfd7008..379e290313dc 100644 --- a/arch/arm64/boot/dts/st/stm32mp251.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp251.dtsi @@ -115,9 +115,8 @@ }; intc: interrupt-controller@4ac00000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; #interrupt-cells = <3>; - #address-cells = <1>; interrupt-controller; reg = <0x0 0x4ac10000 0x0 0x1000>, <0x0 0x4ac20000 0x0 0x2000>, From 06c231fe953a26f4bc9d7a37ba1b9b288a59c7c2 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:06:59 +0200 Subject: [PATCH 243/559] arm64: dts: st: Use 128kB size for aliased GIC400 register access on stm32mp25 SoCs Adjust the size of 8kB GIC regions to 128kB so that each 4kB is mapped 16 times over a 64kB region. The offset is then adjusted in the irq-gic driver. see commit 12e14066f4835 ("irqchip/GIC: Add workaround for aliased GIC400") Fixes: 5d30d03aaf785 ("arm64: dts: st: introduce stm32mp25 SoCs family") Suggested-by: Marc Zyngier Signed-off-by: Christian Bruel Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20250415111654.2103767-3-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp251.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp251.dtsi b/arch/arm64/boot/dts/st/stm32mp251.dtsi index 379e290313dc..87110f91e489 100644 --- a/arch/arm64/boot/dts/st/stm32mp251.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp251.dtsi @@ -119,9 +119,9 @@ #interrupt-cells = <3>; interrupt-controller; reg = <0x0 0x4ac10000 0x0 0x1000>, - <0x0 0x4ac20000 0x0 0x2000>, - <0x0 0x4ac40000 0x0 0x2000>, - <0x0 0x4ac60000 0x0 0x2000>; + <0x0 0x4ac20000 0x0 0x20000>, + <0x0 0x4ac40000 0x0 0x20000>, + <0x0 0x4ac60000 0x0 0x20000>; }; psci { From 02dc83f09c7262533123e7e4dae9c4f9fc40ed17 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:07:00 +0200 Subject: [PATCH 244/559] arm64: dts: st: Adjust interrupt-controller for stm32mp21 SoCs Use gic-400 compatible for aarch64 Fixes: 7a57b1bb1afbf ("arm64: dts: st: introduce stm32mp21 SoCs family") Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-4-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp211.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/st/stm32mp211.dtsi b/arch/arm64/boot/dts/st/stm32mp211.dtsi index 6dd1377f3e1d..52a8209471b8 100644 --- a/arch/arm64/boot/dts/st/stm32mp211.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp211.dtsi @@ -116,7 +116,7 @@ }; intc: interrupt-controller@4ac10000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; reg = <0x4ac10000 0x0 0x1000>, <0x4ac20000 0x0 0x2000>, <0x4ac40000 0x0 0x2000>, From 1bc229e9bb9cd502caeec639cb3cff50aea078f0 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:07:01 +0200 Subject: [PATCH 245/559] arm64: dts: st: Use 128kB size for aliased GIC400 register access on stm32mp21 SoCs Adjust the size of 8kB GIC regions to 128kB so that each 4kB is mapped 16 times over a 64kB region. The offset is then adjusted in the irq-gic driver. see commit 12e14066f4835 ("irqchip/GIC: Add workaround for aliased GIC400") Fixes: 7a57b1bb1afbf ("arm64: dts: st: introduce stm32mp21 SoCs family") Suggested-by: Marc Zyngier Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-5-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp211.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp211.dtsi b/arch/arm64/boot/dts/st/stm32mp211.dtsi index 52a8209471b8..bf888d60cd4f 100644 --- a/arch/arm64/boot/dts/st/stm32mp211.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp211.dtsi @@ -118,9 +118,9 @@ intc: interrupt-controller@4ac10000 { compatible = "arm,gic-400"; reg = <0x4ac10000 0x0 0x1000>, - <0x4ac20000 0x0 0x2000>, - <0x4ac40000 0x0 0x2000>, - <0x4ac60000 0x0 0x2000>; + <0x4ac20000 0x0 0x20000>, + <0x4ac40000 0x0 0x20000>, + <0x4ac60000 0x0 0x20000>; #interrupt-cells = <3>; interrupt-controller; }; From 3a1e1082097b8fa2e5d420de105f4cce804c38b2 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:07:02 +0200 Subject: [PATCH 246/559] arm64: dts: st: Adjust interrupt-controller for stm32mp23 SoCs Use gic-400 compatible and remove address-cells = <1> for aarch64 Fixes: e9b03ef21386e ("arm64: dts: st: introduce stm32mp23 SoCs family") Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-6-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp231.dtsi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp231.dtsi b/arch/arm64/boot/dts/st/stm32mp231.dtsi index 8820d219a33e..3f73cf1d443d 100644 --- a/arch/arm64/boot/dts/st/stm32mp231.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp231.dtsi @@ -1201,13 +1201,12 @@ }; intc: interrupt-controller@4ac10000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; reg = <0x4ac10000 0x1000>, <0x4ac20000 0x2000>, <0x4ac40000 0x2000>, <0x4ac60000 0x2000>; #interrupt-cells = <3>; - #address-cells = <1>; interrupt-controller; }; }; From 2ef5c66cba6171feab05e62e1b22df970b238544 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:07:03 +0200 Subject: [PATCH 247/559] arm64: dts: st: Use 128kB size for aliased GIC400 register access on stm32mp23 SoCs Adjust the size of 8kB GIC regions to 128kB so that each 4kB is mapped 16 times over a 64kB region. The offset is then adjusted in the irq-gic driver. see commit 12e14066f4835 ("irqchip/GIC: Add workaround for aliased GIC400") Fixes: e9b03ef21386e ("arm64: dts: st: introduce stm32mp23 SoCs family") Suggested-by: Marc Zyngier Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-7-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp231.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp231.dtsi b/arch/arm64/boot/dts/st/stm32mp231.dtsi index 3f73cf1d443d..75697acd1345 100644 --- a/arch/arm64/boot/dts/st/stm32mp231.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp231.dtsi @@ -1203,9 +1203,9 @@ intc: interrupt-controller@4ac10000 { compatible = "arm,gic-400"; reg = <0x4ac10000 0x1000>, - <0x4ac20000 0x2000>, - <0x4ac40000 0x2000>, - <0x4ac60000 0x2000>; + <0x4ac20000 0x20000>, + <0x4ac40000 0x20000>, + <0x4ac60000 0x20000>; #interrupt-cells = <3>; interrupt-controller; }; From 11cdb506d0fbf5ac05bf55f5afcb3a215c316490 Mon Sep 17 00:00:00 2001 From: Gary Bisson Date: Tue, 29 Apr 2025 09:16:29 -0700 Subject: [PATCH 248/559] Input: mtk-pmic-keys - fix possible null pointer dereference In mtk_pmic_keys_probe, the regs parameter is only set if the button is parsed in the device tree. However, on hardware where the button is left floating, that node will most likely be removed not to enable that input. In that case the code will try to dereference a null pointer. Let's use the regs struct instead as it is defined for all supported platforms. Note that it is ok setting the key reg even if that latter is disabled as the interrupt won't be enabled anyway. Fixes: b581acb49aec ("Input: mtk-pmic-keys - transfer per-key bit in mtk_pmic_keys_regs") Signed-off-by: Gary Bisson Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/mtk-pmic-keys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/keyboard/mtk-pmic-keys.c b/drivers/input/keyboard/mtk-pmic-keys.c index 5ad6be914160..061d48350df6 100644 --- a/drivers/input/keyboard/mtk-pmic-keys.c +++ b/drivers/input/keyboard/mtk-pmic-keys.c @@ -147,8 +147,8 @@ static void mtk_pmic_keys_lp_reset_setup(struct mtk_pmic_keys *keys, u32 value, mask; int error; - kregs_home = keys->keys[MTK_PMIC_HOMEKEY_INDEX].regs; - kregs_pwr = keys->keys[MTK_PMIC_PWRKEY_INDEX].regs; + kregs_home = ®s->keys_regs[MTK_PMIC_HOMEKEY_INDEX]; + kregs_pwr = ®s->keys_regs[MTK_PMIC_PWRKEY_INDEX]; error = of_property_read_u32(keys->dev->of_node, "power-off-time-sec", &long_press_debounce); From 0759e77a6d9bd34a874da73721ce4a7dc6665023 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Apr 2025 20:36:15 +0200 Subject: [PATCH 249/559] ALSA: usb-audio: Fix duplicated name in MIDI substream names The MIDI substream name string is constructed from the combination of the card shortname (which is taken from USB iProduct) and the USB iJack. The problem is that some devices put the product name to the iJack field, too. For example, aplaymidi -l output on the Lanchkey MK 49 are like: % aplaymidi -l Port Client name Port name 44:0 Launchkey MK4 49 Launchkey MK4 49 Launchkey MK4 44:1 Launchkey MK4 49 Launchkey MK4 49 Launchkey MK4 where the actual iJack name can't be seen because it's truncated due to the doubly words. For resolving those situations, this patch compares the iJack string with the card shortname, and drops if both start with the same words. Then the result becomes like: % aplaymidi -l Port Client name Port name 40:0 Launchkey MK4 49 Launchkey MK4 49 MIDI In 40:1 Launchkey MK4 49 Launchkey MK4 49 DAW In A caveat is that there are some pre-defined names for certain devices in the driver code, and this workaround shouldn't be applied to them. Similarly, when the iJack isn't specified, we should skip this check, too. The patch added those checks in addition to the string comparison. Suggested-by: Paul Davis Tested-by: Paul Davis Link: https://lore.kernel.org/CAFa_cKmEDQWcJatbYWi6A58Zg4Ma9_6Nr3k5LhqwyxC-P_kXtw@mail.gmail.com Link: https://patch.msgid.link/20250429183626.20773-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/midi.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index dcdd7e9e1ae9..cfed000f243a 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1885,10 +1885,18 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi, } port_info = find_port_info(umidi, number); - name_format = port_info ? port_info->name : - (jack_name != default_jack_name ? "%s %s" : "%s %s %d"); - snprintf(substream->name, sizeof(substream->name), - name_format, umidi->card->shortname, jack_name, number + 1); + if (port_info || jack_name == default_jack_name || + strncmp(umidi->card->shortname, jack_name, strlen(umidi->card->shortname)) != 0) { + name_format = port_info ? port_info->name : + (jack_name != default_jack_name ? "%s %s" : "%s %s %d"); + snprintf(substream->name, sizeof(substream->name), + name_format, umidi->card->shortname, jack_name, number + 1); + } else { + /* The manufacturer included the iProduct name in the jack + * name, do not use both + */ + strscpy(substream->name, jack_name); + } *rsubstream = substream; } From e7e5ae71831c44d58627a991e603845a2fed2cab Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 25 Apr 2025 16:50:47 +0100 Subject: [PATCH 250/559] net: dlink: Correct endianness handling of led_mode As it's name suggests, parse_eeprom() parses EEPROM data. This is done by reading data, 16 bits at a time as follows: for (i = 0; i < 128; i++) ((__le16 *) sromdata)[i] = cpu_to_le16(read_eeprom(np, i)); sromdata is at the same memory location as psrom. And the type of psrom is a pointer to struct t_SROM. As can be seen in the loop above, data is stored in sromdata, and thus psrom, as 16-bit little-endian values. However, the integer fields of t_SROM are host byte order integers. And in the case of led_mode this leads to a little endian value being incorrectly treated as host byte order. Looking at rio_set_led_mode, this does appear to be a bug as that code masks led_mode with 0x1, 0x2 and 0x8. Logic that would be effected by a reversed byte order. This problem would only manifest on big endian hosts. Found by inspection while investigating a sparse warning regarding the crc field of t_SROM. I believe that warning is a false positive. And although I plan to send a follow-up to use little-endian types for other the integer fields of PSROM_t I do not believe that will involve any bug fixes. Compile tested only. Fixes: c3f45d322cbd ("dl2k: Add support for IP1000A-based cards") Signed-off-by: Simon Horman Link: https://patch.msgid.link/20250425-dlink-led-mode-v1-1-6bae3c36e736@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/dlink/dl2k.c | 2 +- drivers/net/ethernet/dlink/dl2k.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index d88fbecdab4b..232e839a9d07 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -352,7 +352,7 @@ parse_eeprom (struct net_device *dev) eth_hw_addr_set(dev, psrom->mac_addr); if (np->chip_id == CHIP_IP1000A) { - np->led_mode = psrom->led_mode; + np->led_mode = le16_to_cpu(psrom->led_mode); return 0; } diff --git a/drivers/net/ethernet/dlink/dl2k.h b/drivers/net/ethernet/dlink/dl2k.h index 195dc6cfd895..0e33e2eaae96 100644 --- a/drivers/net/ethernet/dlink/dl2k.h +++ b/drivers/net/ethernet/dlink/dl2k.h @@ -335,7 +335,7 @@ typedef struct t_SROM { u16 sub_system_id; /* 0x06 */ u16 pci_base_1; /* 0x08 (IP1000A only) */ u16 pci_base_2; /* 0x0a (IP1000A only) */ - u16 led_mode; /* 0x0c (IP1000A only) */ + __le16 led_mode; /* 0x0c (IP1000A only) */ u16 reserved1[9]; /* 0x0e-0x1f */ u8 mac_addr[6]; /* 0x20-0x25 */ u8 reserved2[10]; /* 0x26-0x2f */ From b23285e93bef729e67519a5209d5b7fde3b4af50 Mon Sep 17 00:00:00 2001 From: Da Xue Date: Fri, 25 Apr 2025 15:20:09 -0400 Subject: [PATCH 251/559] net: mdio: mux-meson-gxl: set reversed bit when using internal phy This bit is necessary to receive packets from the internal PHY. Without this bit set, no activity occurs on the interface. Normally u-boot sets this bit, but if u-boot is compiled without net support, the interface will be up but without any activity. If bit is set once, it will work until the IP is powered down or reset. The vendor SDK sets this bit along with the PHY_ID bits. Signed-off-by: Da Xue Fixes: 9a24e1ff4326 ("net: mdio: add amlogic gxl mdio mux support") Link: https://patch.msgid.link/20250425192009.1439508-1-da@libre.computer Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-mux-meson-gxl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/mdio/mdio-mux-meson-gxl.c b/drivers/net/mdio/mdio-mux-meson-gxl.c index 00c66240136b..3dd12a8c8b03 100644 --- a/drivers/net/mdio/mdio-mux-meson-gxl.c +++ b/drivers/net/mdio/mdio-mux-meson-gxl.c @@ -17,6 +17,7 @@ #define REG2_LEDACT GENMASK(23, 22) #define REG2_LEDLINK GENMASK(25, 24) #define REG2_DIV4SEL BIT(27) +#define REG2_REVERSED BIT(28) #define REG2_ADCBYPASS BIT(30) #define REG2_CLKINSEL BIT(31) #define ETH_REG3 0x4 @@ -65,7 +66,7 @@ static void gxl_enable_internal_mdio(struct gxl_mdio_mux *priv) * The only constraint is that it must match the one in * drivers/net/phy/meson-gxl.c to properly match the PHY. */ - writel(FIELD_PREP(REG2_PHYID, EPHY_GXL_ID), + writel(REG2_REVERSED | FIELD_PREP(REG2_PHYID, EPHY_GXL_ID), priv->regs + ETH_REG2); /* Enable the internal phy */ From 8988c4b91945173a6b5505764915d470f0238fdc Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 29 Apr 2025 15:22:18 +0100 Subject: [PATCH 252/559] perf tools: Fix in-source libperf build When libperf is built alone in-source, $(OUTPUT) isn't set. This causes the generated uapi path to resolve to '/../arch' which results in a permissions error: mkdir: cannot create directory '/../arch': Permission denied Fix it by removing the preceding '/..' which means that it gets generated either in the tools/lib/perf part of the tree or the OUTPUT folder. Some other rules that rely on OUTPUT further refine this conditionally depending on whether it's an in-source or out-of-source build, but I don't think we need the extra complexity here. And this rule is slightly different to others because the header is needed by both libperf and Perf. This is further complicated by the fact that Perf always passes O=... to libperf even for in source builds, meaning that OUTPUT isn't set consistently between projects. Because we're no longer going one level up to try to generate the file in the tools/ folder, Perf's include rule needs to descend into libperf. Also fix the clean rule while we're here. Reported-by: Thorsten Leemhuis Closes: https://lore.kernel.org/linux-perf-users/7703f88e-ccb7-4c98-9da4-8aad224e780f@leemhuis.info/ Fixes: bfb713ea53c7 ("perf tools: Fix arm64 build by generating unistd_64.h") Signed-off-by: James Clark Tested-by: Thorsten Leemhuis Link: https://lore.kernel.org/r/20250429-james-perf-fix-libperf-in-source-build-v1-1-a1a827ac15e5@linaro.org Signed-off-by: Namhyung Kim --- tools/lib/perf/Makefile | 6 +++--- tools/perf/Makefile.config | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 1a19b5013f45..7fbb50b74c00 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -42,7 +42,7 @@ libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) TEST_ARGS := $(if $(V),-v) INCLUDES = \ --I$(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi \ +-I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ @@ -100,7 +100,7 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -uapi-asm := $(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi/asm +uapi-asm := $(OUTPUT)arch/$(SRCARCH)/include/generated/uapi/asm ifeq ($(SRCARCH),arm64) syscall-y := $(uapi-asm)/unistd_64.h endif @@ -130,7 +130,7 @@ all: fixdep clean: $(LIBAPI)-clean $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \ - $(TESTS_STATIC) $(TESTS_SHARED) + $(TESTS_STATIC) $(TESTS_SHARED) $(syscall-y) TESTS_IN = tests-in.o diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a52482654d4b..b7769a22fe1a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -29,7 +29,7 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated -CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi +CFLAGS += -I$(OUTPUT)libperf/arch/$(SRCARCH)/include/generated/uapi # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) From 8a558cbda51bef09773c72bf74a32047479110c7 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 4 Apr 2025 12:54:21 +0200 Subject: [PATCH 253/559] idpf: fix potential memory leak on kcalloc() failure In case of failing on rss_data->rss_key allocation the function is freeing vport without freeing earlier allocated q_vector_idxs. Fix it. Move from freeing in error branch to goto scheme. Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport") Reviewed-by: Pavan Kumar Linga Reviewed-by: Aleksandr Loktionov Suggested-by: Pavan Kumar Linga Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 730a9c7a59f2..82f09b4030bc 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1113,11 +1113,9 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, num_max_q = max(max_q->max_txq, max_q->max_rxq); vport->q_vector_idxs = kcalloc(num_max_q, sizeof(u16), GFP_KERNEL); - if (!vport->q_vector_idxs) { - kfree(vport); + if (!vport->q_vector_idxs) + goto free_vport; - return NULL; - } idpf_vport_init(vport, max_q); /* This alloc is done separate from the LUT because it's not strictly @@ -1127,11 +1125,9 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, */ rss_data = &adapter->vport_config[idx]->user_config.rss_data; rss_data->rss_key = kzalloc(rss_data->rss_key_size, GFP_KERNEL); - if (!rss_data->rss_key) { - kfree(vport); + if (!rss_data->rss_key) + goto free_vector_idxs; - return NULL; - } /* Initialize default rss key */ netdev_rss_key_fill((void *)rss_data->rss_key, rss_data->rss_key_size); @@ -1144,6 +1140,13 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, adapter->next_vport = idpf_get_free_slot(adapter); return vport; + +free_vector_idxs: + kfree(vport->q_vector_idxs); +free_vport: + kfree(vport); + + return NULL; } /** From ed375b182140eeb9c73609b17939c8a29b27489e Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Thu, 10 Apr 2025 13:52:23 +0200 Subject: [PATCH 254/559] idpf: protect shutdown from reset Before the referenced commit, the shutdown just called idpf_remove(), this way IDPF_REMOVE_IN_PROG was protecting us from the serv_task rescheduling reset. Without this flag set the shutdown process is vulnerable to HW reset or any other triggering conditions (such as default mailbox being destroyed). When one of conditions checked in idpf_service_task becomes true, vc_event_task can be rescheduled during shutdown, this leads to accessing freed memory e.g. idpf_req_rel_vector_indexes() trying to read vport->q_vector_idxs. This in turn causes the system to become defunct during e.g. systemctl kexec. Considering using IDPF_REMOVE_IN_PROG would lead to more heavy shutdown process, instead just cancel the serv_task before cancelling adapter->serv_task before cancelling adapter->vc_event_task to ensure that reset will not be scheduled while we are doing a shutdown. Fixes: 4c9106f4906a ("idpf: fix adapter NULL pointer dereference on reboot") Reviewed-by: Michal Swiatkowski Signed-off-by: Larysa Zaremba Reviewed-by: Simon Horman Reviewed-by: Emil Tantilov Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c index bec4a02c5373..b35713036a54 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_main.c +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -89,6 +89,7 @@ static void idpf_shutdown(struct pci_dev *pdev) { struct idpf_adapter *adapter = pci_get_drvdata(pdev); + cancel_delayed_work_sync(&adapter->serv_task); cancel_delayed_work_sync(&adapter->vc_event_task); idpf_vc_core_deinit(adapter); idpf_deinit_dflt_mbx(adapter); From c7d6cb96d5c33b5148f3dc76fcd30a9b8cd9e973 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 22 Apr 2025 14:03:09 -0700 Subject: [PATCH 255/559] igc: fix lock order in igc_ptp_reset Commit 1a931c4f5e68 ("igc: add lock preventing multiple simultaneous PTM transactions") added a new mutex to protect concurrent PTM transactions. This lock is acquired in igc_ptp_reset() in order to ensure the PTM registers are properly disabled after a device reset. The flow where the lock is acquired already holds a spinlock, so acquiring a mutex leads to a sleep-while-locking bug, reported both by smatch, and the kernel test robot. The critical section in igc_ptp_reset() does correctly use the readx_poll_timeout_atomic variants, but the standard PTM flow uses regular sleeping variants. This makes converting the mutex to a spinlock a bit tricky. Instead, re-order the locking in igc_ptp_reset. Acquire the mutex first, and then the tmreg_lock spinlock. This is safe because there is no other ordering dependency on these locks, as this is the only place where both locks were acquired simultaneously. Indeed, any other flow acquiring locks in that order would be wrong regardless. Signed-off-by: Jacob Keller Fixes: 1a931c4f5e68 ("igc: add lock preventing multiple simultaneous PTM transactions") Link: https://lore.kernel.org/intel-wired-lan/Z_-P-Hc1yxcw0lTB@stanley.mountain/ Link: https://lore.kernel.org/intel-wired-lan/202504211511.f7738f5d-lkp@intel.com/T/#u Reviewed-by: Przemek Kitszel Reviewed-by: Vitaly Lifshits Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 612ed26a29c5..efc7b30e4211 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -1290,6 +1290,8 @@ void igc_ptp_reset(struct igc_adapter *adapter) /* reset the tstamp_config */ igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); + mutex_lock(&adapter->ptm_lock); + spin_lock_irqsave(&adapter->tmreg_lock, flags); switch (adapter->hw.mac.type) { @@ -1308,7 +1310,6 @@ void igc_ptp_reset(struct igc_adapter *adapter) if (!igc_is_crosststamp_supported(adapter)) break; - mutex_lock(&adapter->ptm_lock); wr32(IGC_PCIE_DIG_DELAY, IGC_PCIE_DIG_DELAY_DEFAULT); wr32(IGC_PCIE_PHY_DELAY, IGC_PCIE_PHY_DELAY_DEFAULT); @@ -1332,7 +1333,6 @@ void igc_ptp_reset(struct igc_adapter *adapter) netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); igc_ptm_reset(hw); - mutex_unlock(&adapter->ptm_lock); break; default: /* No work to do. */ @@ -1349,5 +1349,7 @@ void igc_ptp_reset(struct igc_adapter *adapter) out: spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + mutex_unlock(&adapter->ptm_lock); + wrfl(); } From 6e0490fc36cdac696f96e57b61d93b9ae32e0f4c Mon Sep 17 00:00:00 2001 From: Chad Monroe Date: Sun, 27 Apr 2025 02:05:44 +0100 Subject: [PATCH 256/559] net: ethernet: mtk_eth_soc: fix SER panic with 4GB+ RAM If the mtk_poll_rx() function detects the MTK_RESETTING flag, it will jump to release_desc and refill the high word of the SDP on the 4GB RFB. Subsequently, mtk_rx_clean will process an incorrect SDP, leading to a panic. Add patch from MediaTek's SDK to resolve this. Fixes: 2d75891ebc09 ("net: ethernet: mtk_eth_soc: support 36-bit DMA addressing on MT7988") Link: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/71f47ea785699c6aa3b922d66c2bdc1a43da25b1 Signed-off-by: Chad Monroe Link: https://patch.msgid.link/4adc2aaeb0fb1b9cdc56bf21cf8e7fa328daa345.1745715843.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 83068925c589..8fda4ce80d81 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2248,14 +2248,18 @@ skip_rx: ring->data[idx] = new_data; rxd->rxd1 = (unsigned int)dma_addr; release_desc: + if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA)) { + if (unlikely(dma_addr == DMA_MAPPING_ERROR)) + addr64 = FIELD_GET(RX_DMA_ADDR64_MASK, + rxd->rxd2); + else + addr64 = RX_DMA_PREP_ADDR64(dma_addr); + } + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) rxd->rxd2 = RX_DMA_LSO; else - rxd->rxd2 = RX_DMA_PREP_PLEN0(ring->buf_size); - - if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA) && - likely(dma_addr != DMA_MAPPING_ERROR)) - rxd->rxd2 |= RX_DMA_PREP_ADDR64(dma_addr); + rxd->rxd2 = RX_DMA_PREP_PLEN0(ring->buf_size) | addr64; ring->calc_idx = idx; done++; From 426d487bca38b34f39c483edfc6313a036446b33 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Apr 2025 17:48:55 +0300 Subject: [PATCH 257/559] net: dsa: felix: fix broken taprio gate states after clock jump Simplest setup to reproduce the issue: connect 2 ports of the LS1028A-RDB together (eno0 with swp0) and run: $ ip link set eno0 up && ip link set swp0 up $ tc qdisc replace dev swp0 parent root handle 100 taprio num_tc 8 \ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \ base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \ sched-entry S 20 300000 sched-entry S 48 200000 \ sched-entry S 20 300000 sched-entry S 83 200000 \ sched-entry S 40 300000 sched-entry S 00 200000 flags 2 $ ptp4l -i eno0 -f /etc/linuxptp/configs/gPTP.cfg -m & $ ptp4l -i swp0 -f /etc/linuxptp/configs/gPTP.cfg -m One will observe that the PTP state machine on swp0 starts synchronizing, then it attempts to do a clock step, and after that, it never fails to recover from the condition below. ptp4l[82.427]: selected best master clock 00049f.fffe.05f627 ptp4l[82.428]: port 1 (swp0): MASTER to UNCALIBRATED on RS_SLAVE ptp4l[83.252]: port 1 (swp0): UNCALIBRATED to SLAVE on MASTER_CLOCK_SELECTED ptp4l[83.886]: rms 4537731277 max 9075462553 freq -18518 +/- 11467 delay 818 +/- 0 ptp4l[84.170]: timed out while polling for tx timestamp ptp4l[84.171]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it ptp4l[84.172]: port 1 (swp0): send peer delay request failed ptp4l[84.173]: port 1 (swp0): clearing fault immediately ptp4l[84.269]: port 1 (swp0): SLAVE to LISTENING on INIT_COMPLETE ptp4l[85.303]: timed out while polling for tx timestamp ptp4l[84.171]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it ptp4l[84.172]: port 1 (swp0): send peer delay request failed ptp4l[84.173]: port 1 (swp0): clearing fault immediately ptp4l[84.269]: port 1 (swp0): SLAVE to LISTENING on INIT_COMPLETE ptp4l[85.303]: timed out while polling for tx timestamp ptp4l[85.304]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it ptp4l[85.305]: port 1 (swp0): send peer delay response failed ptp4l[85.306]: port 1 (swp0): clearing fault immediately ptp4l[86.304]: timed out while polling for tx timestamp A hint is given by the non-zero statistics for dropped packets which were expecting hardware TX timestamps: $ ethtool --include-statistics -T swp0 (...) Statistics: tx_pkts: 30 tx_lost: 11 tx_err: 0 We know that when PTP clock stepping takes place (from ocelot_ptp_settime64() or from ocelot_ptp_adjtime()), vsc9959_tas_clock_adjust() is called. Another interesting hint is that placing an early return in vsc9959_tas_clock_adjust(), so as to neutralize this function, fixes the issue and TX timestamps are no longer dropped. The debugging function written by me and included below is intended to read the GCL RAM, after the admin schedule became operational, through the two status registers available for this purpose: QSYS_GCL_STATUS_REG_1 and QSYS_GCL_STATUS_REG_2. static void vsc9959_print_tas_gcl(struct ocelot *ocelot) { u32 val, list_length, interval, gate_state; int i, err; err = read_poll_timeout(ocelot_read, val, !(val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING), 10, 100000, false, ocelot, QSYS_PARAM_STATUS_REG_8); if (err) { dev_err(ocelot->dev, "Failed to wait for TAS config pending bit to clear: %pe\n", ERR_PTR(err)); return; } val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_3); list_length = QSYS_PARAM_STATUS_REG_3_LIST_LENGTH_X(val); dev_info(ocelot->dev, "GCL length: %u\n", list_length); for (i = 0; i < list_length; i++) { ocelot_rmw(ocelot, QSYS_GCL_STATUS_REG_1_GCL_ENTRY_NUM(i), QSYS_GCL_STATUS_REG_1_GCL_ENTRY_NUM_M, QSYS_GCL_STATUS_REG_1); interval = ocelot_read(ocelot, QSYS_GCL_STATUS_REG_2); val = ocelot_read(ocelot, QSYS_GCL_STATUS_REG_1); gate_state = QSYS_GCL_STATUS_REG_1_GATE_STATE_X(val); dev_info(ocelot->dev, "GCL entry %d: states 0x%x interval %u\n", i, gate_state, interval); } } Calling it from two places: after the initial QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE performed by vsc9959_qos_port_tas_set(), and after the one done by vsc9959_tas_clock_adjust(), I notice the following difference. From the tc-taprio process context, where the schedule was initially configured, the GCL looks like this: mscc_felix 0000:00:00.5: GCL length: 8 mscc_felix 0000:00:00.5: GCL entry 0: states 0x20 interval 300000 mscc_felix 0000:00:00.5: GCL entry 1: states 0x10 interval 200000 mscc_felix 0000:00:00.5: GCL entry 2: states 0x20 interval 300000 mscc_felix 0000:00:00.5: GCL entry 3: states 0x48 interval 200000 mscc_felix 0000:00:00.5: GCL entry 4: states 0x20 interval 300000 mscc_felix 0000:00:00.5: GCL entry 5: states 0x83 interval 200000 mscc_felix 0000:00:00.5: GCL entry 6: states 0x40 interval 300000 mscc_felix 0000:00:00.5: GCL entry 7: states 0x0 interval 200000 But from the ptp4l clock stepping process context, when the vsc9959_tas_clock_adjust() hook is called, the GCL RAM of the operational schedule now looks like this: mscc_felix 0000:00:00.5: GCL length: 8 mscc_felix 0000:00:00.5: GCL entry 0: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 1: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 2: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 3: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 4: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 5: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 6: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 7: states 0x0 interval 0 I do not have a formal explanation, just experimental conclusions. It appears that after triggering QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE for a port's TAS, the GCL entry RAM is updated anyway, despite what the documentation claims: "Specify the time interval in QSYS::GCL_CFG_REG_2.TIME_INTERVAL. This triggers the actual RAM write with the gate state and the time interval for the entry number specified". We don't touch that register (through vsc9959_tas_gcl_set()) from vsc9959_tas_clock_adjust(), yet the GCL RAM is updated anyway. It seems to be updated with effectively stale memory, which in my testing can hold a variety of things, including even pieces of the previously applied schedule, for particular schedule lengths. As such, in most circumstances it is very difficult to pinpoint this issue, because the newly updated schedule would "behave strangely", but ultimately might still pass traffic to some extent, due to some gate entries still being present in the stale GCL entry RAM. It is easy to miss. With the particular schedule given at the beginning, the GCL RAM "happens" to be reproducibly rewritten with all zeroes, and this is consistent with what we see: when the time-aware shaper has gate entries with all gates closed, traffic is dropped on TX, no wonder we can't retrieve TX timestamps. Rewriting the GCL entry RAM when reapplying the new base time fixes the observed issue. Fixes: 8670dc33f48b ("net: dsa: felix: update base time of time-aware shaper when adjusting PTP time") Reported-by: Richie Pearn Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250426144859.3128352-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 940f1b71226d..7b35d24c38d7 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1543,7 +1543,7 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot) struct tc_taprio_qopt_offload *taprio; struct ocelot_port *ocelot_port; struct timespec64 base_ts; - int port; + int i, port; u32 val; mutex_lock(&ocelot->fwd_domain_lock); @@ -1575,6 +1575,9 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot) QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB_M, QSYS_PARAM_CFG_REG_3); + for (i = 0; i < taprio->num_entries; i++) + vsc9959_tas_gcl_set(ocelot, i, &taprio->entries[i]); + ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE, QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE, QSYS_TAS_PARAM_CFG_CTRL); From efa6eb7d77aaf5b05eed25c0ecbf7754cc325c83 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Apr 2025 17:48:56 +0300 Subject: [PATCH 258/559] selftests: net: tsn_lib: create common helper for counting received packets This snippet will be necessary for a future isochron-based test, so provide a simpler high-level interface for counting the received packets. Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250426144859.3128352-3-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ocelot/psfp.sh | 7 +------ tools/testing/selftests/net/forwarding/tsn_lib.sh | 11 +++++++++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh index bed748dde4b0..f96a4bc7120f 100755 --- a/tools/testing/selftests/drivers/net/ocelot/psfp.sh +++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh @@ -272,12 +272,7 @@ run_test() "" \ "${isochron_dat}" - # Count all received packets by looking at the non-zero RX timestamps - received=$(isochron report \ - --input-file "${isochron_dat}" \ - --printf-format "%u\n" --printf-args "R" | \ - grep -w -v '0' | wc -l) - + received=$(isochron_report_num_received "${isochron_dat}") if [ "${received}" = "${expected}" ]; then RET=0 else diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh index b91bcd8008a9..19da1ccceac8 100644 --- a/tools/testing/selftests/net/forwarding/tsn_lib.sh +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -247,3 +247,14 @@ isochron_do() cpufreq_restore ${ISOCHRON_CPU} } + +isochron_report_num_received() +{ + local isochron_dat=$1; shift + + # Count all received packets by looking at the non-zero RX timestamps + isochron report \ + --input-file "${isochron_dat}" \ + --printf-format "%u\n" --printf-args "R" | \ + grep -w -v '0' | wc -l +} From f52fe6efd61f54c5cb0e19ef1fde96cf23048a70 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Apr 2025 17:48:57 +0300 Subject: [PATCH 259/559] selftests: net: tsn_lib: add window_size argument to isochron_do() Make out-of-band testing (send a packet when its traffic class gate is closed, expecting it to be delayed) more predictable by allowing the window size to be customized by isochron_do(). From man isochron-send, the window size alters the advance time (the delta between the transmission time of the packet, and its expected TX time when using SO_TXTIME or tc-taprio on the sender). In absence of the argument, isochron-send defaults to maximizing the advance time (making it equal to the cycle length). The default behavior is exactly what is problematic. An advance time that is too large will make packets intended to be out-of-band still be potentially in-band with an open gate from the schedule's previous cycle. We need to allow that advance time to be reduced. Perhaps a bit confusingly, isochron_do() has a shift_time argument currently, but that does not help here. The shift time shifts both the user space wakeup time and the expected TX time by equal amounts, it is unable of bringing them closer to one another. Set the window size properly for the Ocelot PSFP selftest as well. That used to work due to a very carefully chosen SHIFT_TIME_NS. I've re-tested that the test still works properly. Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250426144859.3128352-4-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ocelot/psfp.sh | 1 + tools/testing/selftests/net/forwarding/tsn_lib.sh | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh index f96a4bc7120f..8972f42dfe03 100755 --- a/tools/testing/selftests/drivers/net/ocelot/psfp.sh +++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh @@ -266,6 +266,7 @@ run_test() "${base_time}" \ "${CYCLE_TIME_NS}" \ "${SHIFT_TIME_NS}" \ + "${GATE_DURATION_NS}" \ "${NUM_PKTS}" \ "${STREAM_VID}" \ "${STREAM_PRIO}" \ diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh index 19da1ccceac8..bcee7960a39f 100644 --- a/tools/testing/selftests/net/forwarding/tsn_lib.sh +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -182,6 +182,7 @@ isochron_do() local base_time=$1; shift local cycle_time=$1; shift local shift_time=$1; shift + local window_size=$1; shift local num_pkts=$1; shift local vid=$1; shift local priority=$1; shift @@ -212,6 +213,10 @@ isochron_do() extra_args="${extra_args} --shift-time=${shift_time}" fi + if ! [ -z "${window_size}" ]; then + extra_args="${extra_args} --window-size=${window_size}" + fi + if [ "${use_l2}" = "true" ]; then extra_args="${extra_args} --l2 --etype=0xdead ${vid}" receiver_extra_args="--l2 --etype=0xdead" From 4eb9da050f005fbbb7d301e8e99cfdb6e4771a0d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Apr 2025 17:48:58 +0300 Subject: [PATCH 260/559] selftests: net: tc_taprio: new test Add a forwarding path test for tc-taprio, based on isochron. This is specifically intended for NICs with an offloaded data path (switchdev/DSA) and requires taprio 'flags 2'. Also, $h1 and $h2 must support hardware timestamping, and $h1 tc-etf offload, for isochron to work. Packets received by a switch while the egress port has a taprio schedule with an open gate for the traffic class must be sent right away. Packets received by the switch while the traffic class gate must be delayed until it opens. Packets received by the switch must be dropped if the gate for the traffic class never opens. Packets should pass if the maximum SDU for the traffic class allows it, and should be dropped otherwise. The schedule should auto-update itself if clock jumps take place while taprio is installed. Repeat most of the above tests after forcing two clock jumps, one backwards (in Jan 1970) and one back into the present. Symlink it from tools/testing/selftests/drivers/net/dsa, because usually DSA ports have the same MAC address, and we need STABLE_MAC_ADDRS=yes from its forwarding.config for the test to run successfully. Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250426144859.3128352-5-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/dsa/tc_taprio.sh | 1 + .../selftests/net/forwarding/tc_taprio.sh | 421 ++++++++++++++++++ .../selftests/net/forwarding/tsn_lib.sh | 10 + 3 files changed, 432 insertions(+) create mode 120000 tools/testing/selftests/drivers/net/dsa/tc_taprio.sh create mode 100755 tools/testing/selftests/net/forwarding/tc_taprio.sh diff --git a/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh new file mode 120000 index 000000000000..d16a65e7595d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh \ No newline at end of file diff --git a/tools/testing/selftests/net/forwarding/tc_taprio.sh b/tools/testing/selftests/net/forwarding/tc_taprio.sh new file mode 100755 index 000000000000..8992aeabfe0b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_taprio.sh @@ -0,0 +1,421 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" \ + test_clock_jump_backward \ + test_taprio_after_ptp \ + test_max_sdu \ + test_clock_jump_backward_forward \ +" +NUM_NETIFS=4 +source tc_common.sh +source lib.sh +source tsn_lib.sh + +require_command python3 + +# The test assumes the usual topology from the README, where h1 is connected to +# swp1, h2 to swp2, and swp1 and swp2 are together in a bridge. +# Additional assumption: h1 and h2 use the same PHC, and so do swp1 and swp2. +# By synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized to +# swp1 (and both to CLOCK_REALTIME). +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +UDS_ADDRESS_H1="/var/run/ptp4l_h1" +UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1" + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +# Tunables +NUM_PKTS=100 +STREAM_VID=10 +STREAM_PRIO_1=6 +STREAM_PRIO_2=5 +STREAM_PRIO_3=4 +# PTP uses TC 0 +ALL_GATES=$((1 << 0 | 1 << STREAM_PRIO_1 | 1 << STREAM_PRIO_2)) +# Use a conservative cycle of 10 ms to allow the test to still pass when the +# kernel has some extra overhead like lockdep etc +CYCLE_TIME_NS=10000000 +# Create two Gate Control List entries, one OPEN and one CLOSE, of equal +# durations +GATE_DURATION_NS=$((CYCLE_TIME_NS / 2)) +# Give 2/3 of the cycle time to user space and 1/3 to the kernel +FUDGE_FACTOR=$((CYCLE_TIME_NS / 3)) +# Shift the isochron base time by half the gate time, so that packets are +# always received by swp1 close to the middle of the time slot, to minimize +# inaccuracies due to network sync +SHIFT_TIME_NS=$((GATE_DURATION_NS / 2)) + +path_delay= + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +switch_create() +{ + local h2_mac_addr=$(mac_get $h2) + + ip link set $swp1 up + ip link set $swp2 up + + ip link add br0 type bridge vlan_filtering 1 + ip link set $swp1 master br0 + ip link set $swp2 master br0 + ip link set br0 up + + bridge vlan add dev $swp2 vid $STREAM_VID + bridge vlan add dev $swp1 vid $STREAM_VID + bridge fdb add dev $swp2 \ + $h2_mac_addr vlan $STREAM_VID static master +} + +switch_destroy() +{ + ip link del br0 +} + +ptp_setup() +{ + # Set up swp1 as a master PHC for h1, synchronized to the local + # CLOCK_REALTIME. + phc2sys_start $UDS_ADDRESS_SWP1 + ptp4l_start $h1 true $UDS_ADDRESS_H1 + ptp4l_start $swp1 false $UDS_ADDRESS_SWP1 +} + +ptp_cleanup() +{ + ptp4l_stop $swp1 + ptp4l_stop $h1 + phc2sys_stop +} + +txtime_setup() +{ + local if_name=$1 + + tc qdisc add dev $if_name clsact + # Classify PTP on TC 7 and isochron on TC 6 + tc filter add dev $if_name egress protocol 0x88f7 \ + flower action skbedit priority 7 + tc filter add dev $if_name egress protocol 802.1Q \ + flower vlan_ethtype 0xdead action skbedit priority 6 + tc qdisc add dev $if_name handle 100: parent root mqprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + hw 1 + # Set up TC 5, 6, 7 for SO_TXTIME. tc-mqprio queues count from 1. + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_1 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_2 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_3 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR +} + +txtime_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name clsact + tc qdisc del dev $if_name root +} + +taprio_replace() +{ + local if_name="$1"; shift + local extra_args="$1"; shift + + # STREAM_PRIO_1 always has an open gate. + # STREAM_PRIO_2 has a gate open for GATE_DURATION_NS (half the cycle time) + # STREAM_PRIO_3 always has a closed gate. + tc qdisc replace dev $if_name root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + sched-entry S $(printf "%x" $ALL_GATES) $GATE_DURATION_NS \ + sched-entry S $(printf "%x" $((ALL_GATES & ~(1 << STREAM_PRIO_2)))) $GATE_DURATION_NS \ + base-time 0 flags 0x2 $extra_args + taprio_wait_for_admin $if_name +} + +taprio_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name root +} + +probe_path_delay() +{ + local isochron_dat="$(mktemp)" + local received + + log_info "Probing path delay" + + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" 0 \ + "$CYCLE_TIME_NS" "" "" "$NUM_PKTS" \ + "$STREAM_VID" "$STREAM_PRIO_1" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + if [ "$received" != "$NUM_PKTS" ]; then + echo "Cannot establish basic data path between $h1 and $h2" + exit $ksft_fail + fi + + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(np.max(w))) + EOF + path_delay=$(python3 ./isochron_postprocess.py) + + log_info "Path delay from $h1 to $h2 estimated at $path_delay ns" + + if [ "$path_delay" -gt "$GATE_DURATION_NS" ]; then + echo "Path delay larger than gate duration, aborting" + exit $ksft_fail + fi + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + switch_create + + txtime_setup $h1 + + # Temporarily set up PTP just to probe the end-to-end path delay. + ptp_setup + probe_path_delay + ptp_cleanup +} + +cleanup() +{ + pre_cleanup + + isochron_recv_stop + txtime_cleanup $h1 + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +run_test() +{ + local base_time=$1; shift + local stream_prio=$1; shift + local expected_delay=$1; shift + local should_fail=$1; shift + local test_name=$1; shift + local isochron_dat="$(mktemp)" + local received + local median_delay + + RET=0 + + # Set the shift time equal to the cycle time, which effectively + # cancels the default advance time. Packets won't be sent early in + # software, which ensures that they won't prematurely enter through + # the open gate in __test_out_of_band(). Also, the gate is open for + # long enough that this won't cause a problem in __test_in_band(). + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" "$base_time" \ + "$CYCLE_TIME_NS" "$SHIFT_TIME_NS" "$GATE_DURATION_NS" \ + "$NUM_PKTS" "$STREAM_VID" "$stream_prio" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + [ "$received" = "$NUM_PKTS" ] + check_err_fail $should_fail $? "Reception of $NUM_PKTS packets" + + if [ $should_fail = 0 ] && [ "$received" = "$NUM_PKTS" ]; then + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(int(np.median(w)))) + EOF + median_delay=$(python3 ./isochron_postprocess.py) + + # If the condition below is true, packets were delayed by a closed gate + [ "$median_delay" -gt $((path_delay + expected_delay)) ] + check_fail $? "Median delay $median_delay is greater than expected delay $expected_delay plus path delay $path_delay" + + # If the condition below is true, packets were sent expecting them to + # hit a closed gate in the switch, but were not delayed + [ "$expected_delay" -gt 0 ] && [ "$median_delay" -lt "$expected_delay" ] + check_fail $? "Median delay $median_delay is less than expected delay $expected_delay" + fi + + log_test "$test_name" + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +__test_always_open() +{ + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Gate always open" +} + +__test_always_closed() +{ + run_test 0.000000000 $STREAM_PRIO_3 0 1 "Gate always closed" +} + +__test_in_band() +{ + # Send packets in-band with the OPEN gate entry + run_test 0.000000000 $STREAM_PRIO_2 0 0 "In band with gate" +} + +__test_out_of_band() +{ + # Send packets in-band with the CLOSE gate entry + run_test 0.005000000 $STREAM_PRIO_2 \ + $((GATE_DURATION_NS - SHIFT_TIME_NS)) 0 \ + "Out of band with gate" +} + +run_subtests() +{ + __test_always_open + __test_always_closed + __test_in_band + __test_out_of_band +} + +test_taprio_after_ptp() +{ + log_info "Setting up taprio after PTP" + ptp_setup + taprio_replace $swp2 + run_subtests + taprio_cleanup $swp2 + ptp_cleanup +} + +__test_under_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 100 0" + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Under maximum SDU" +} + +__test_over_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 20 0" + run_test 0.000000000 $STREAM_PRIO_1 0 1 "Over maximum SDU" +} + +test_max_sdu() +{ + ptp_setup + __test_under_max_sdu + __test_over_max_sdu + taprio_cleanup $swp2 + ptp_cleanup +} + +# Perform a clock jump in the past without synchronization running, so that the +# time base remains where it was set by phc_ctl. +test_clock_jump_backward() +{ + # This is a more complex schedule specifically crafted in a way that + # has been problematic on NXP LS1028A. Not much to test with it other + # than the fact that it passes traffic. + tc qdisc replace dev $swp2 root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \ + base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \ + sched-entry S 20 300000 sched-entry S 48 200000 \ + sched-entry S 20 300000 sched-entry S 83 200000 \ + sched-entry S 40 300000 sched-entry S 00 200000 flags 2 + + log_info "Forcing a backward clock jump" + phc_ctl $swp1 set 0 + + ping_test $h1 192.0.2.2 + taprio_cleanup $swp2 +} + +# Test that taprio tolerates clock jumps. +# Since ptp4l and phc2sys are running, it is expected for the time to +# eventually recover (through yet another clock jump). Isochron waits +# until that is the case. +test_clock_jump_backward_forward() +{ + log_info "Forcing a backward and a forward clock jump" + taprio_replace $swp2 + phc_ctl $swp1 set 0 + ptp_setup + ping_test $h1 192.0.2.2 + run_subtests + ptp_cleanup + taprio_cleanup $swp2 +} + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_test_skip "Could not test offloaded functionality" + exit $EXIT_STATUS +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh index bcee7960a39f..08c044ff6689 100644 --- a/tools/testing/selftests/net/forwarding/tsn_lib.sh +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -2,6 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright 2021-2022 NXP +tc_testing_scripts_dir=$(dirname $0)/../../tc-testing/scripts + REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes} REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes} @@ -18,6 +20,7 @@ fi if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then require_command phc2sys require_command ptp4l + require_command phc_ctl fi phc2sys_start() @@ -263,3 +266,10 @@ isochron_report_num_received() --printf-format "%u\n" --printf-args "R" | \ grep -w -v '0' | wc -l } + +taprio_wait_for_admin() +{ + local if_name="$1"; shift + + "$tc_testing_scripts_dir/taprio_wait_for_admin.sh" "$(which tc)" "$if_name" +} From b936a9b8d4a585ccb6d454921c36286bfe63e01d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 26 Apr 2025 17:32:09 +0200 Subject: [PATCH 261/559] net: ipv6: fix UDPv6 GSO segmentation with NAT If any address or port is changed, update it in all packets and recalculate checksum. Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Signed-off-by: Felix Fietkau Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250426153210.14044-1-nbd@nbd.name Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 61 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 2c0725583be3..9a8142ccbabe 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -247,6 +247,62 @@ static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs) return segs; } +static void __udpv6_gso_segment_csum(struct sk_buff *seg, + struct in6_addr *oldip, + const struct in6_addr *newip, + __be16 *oldport, __be16 newport) +{ + struct udphdr *uh = udp_hdr(seg); + + if (ipv6_addr_equal(oldip, newip) && *oldport == newport) + return; + + if (uh->check) { + inet_proto_csum_replace16(&uh->check, seg, oldip->s6_addr32, + newip->s6_addr32, true); + + inet_proto_csum_replace2(&uh->check, seg, *oldport, newport, + false); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + + *oldip = *newip; + *oldport = newport; +} + +static struct sk_buff *__udpv6_gso_segment_list_csum(struct sk_buff *segs) +{ + const struct ipv6hdr *iph; + const struct udphdr *uh; + struct ipv6hdr *iph2; + struct sk_buff *seg; + struct udphdr *uh2; + + seg = segs; + uh = udp_hdr(seg); + iph = ipv6_hdr(seg); + uh2 = udp_hdr(seg->next); + iph2 = ipv6_hdr(seg->next); + + if (!(*(const u32 *)&uh->source ^ *(const u32 *)&uh2->source) && + ipv6_addr_equal(&iph->saddr, &iph2->saddr) && + ipv6_addr_equal(&iph->daddr, &iph2->daddr)) + return segs; + + while ((seg = seg->next)) { + uh2 = udp_hdr(seg); + iph2 = ipv6_hdr(seg); + + __udpv6_gso_segment_csum(seg, &iph2->saddr, &iph->saddr, + &uh2->source, uh->source); + __udpv6_gso_segment_csum(seg, &iph2->daddr, &iph->daddr, + &uh2->dest, uh->dest); + } + + return segs; +} + static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, netdev_features_t features, bool is_ipv6) @@ -259,7 +315,10 @@ static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss); - return is_ipv6 ? skb : __udpv4_gso_segment_list_csum(skb); + if (is_ipv6) + return __udpv6_gso_segment_list_csum(skb); + else + return __udpv4_gso_segment_list_csum(skb); } struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, From 95b2536137eeb66f20947e0fb0d0c100c8d6a140 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 29 Apr 2025 09:35:19 +0200 Subject: [PATCH 262/559] ASoC: Intel: catpt: avoid type mismatch in dev_dbg() format Depending on the architecture __ffs() returns either an 'unsigned long' or 'unsigned int' result. Compile-testing this driver on targets that use the latter produces a warning: sound/soc/intel/catpt/dsp.c: In function 'catpt_dsp_set_srampge': sound/soc/intel/catpt/dsp.c:181:44: error: format '%ld' expects argument of type 'long int', but argument 4 has type 'u32' {aka 'unsigned int'} [-Werror=format=] 181 | dev_dbg(cdev->dev, "sanitize block %ld: off 0x%08x\n", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Change the type of the local variable to match the format string and avoid the warning on any architecture. Signed-off-by: Arnd Bergmann Acked-by: Cezary Rojewski Link: https://patch.msgid.link/20250429073545.3558494-1-arnd@kernel.org Signed-off-by: Mark Brown --- sound/soc/intel/catpt/dsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/catpt/dsp.c b/sound/soc/intel/catpt/dsp.c index 5993819cc58a..008a20a2acbd 100644 --- a/sound/soc/intel/catpt/dsp.c +++ b/sound/soc/intel/catpt/dsp.c @@ -156,7 +156,7 @@ static void catpt_dsp_set_srampge(struct catpt_dev *cdev, struct resource *sram, { unsigned long old; u32 off = sram->start; - u32 b = __ffs(mask); + unsigned long b = __ffs(mask); old = catpt_readl_pci(cdev, VDRTCTL0) & mask; dev_dbg(cdev->dev, "SRAMPGE [0x%08lx] 0x%08lx -> 0x%08lx", From 36fd6275818e93d5bc44140d546bf2a45e88feee Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Mon, 28 Apr 2025 09:30:55 +0200 Subject: [PATCH 263/559] spi: spi-qpic-snand: fix NAND_READ_LOCATION_2 register handling The precomputed value for the NAND_READ_LOCATION_2 register should be stored in 'snandc->regs->read_location2'. Fix the qcom_spi_set_read_loc_first() function accordingly. Fixes: 7304d1909080 ("spi: spi-qpic: add driver for QCOM SPI NAND flash Interface") Signed-off-by: Gabor Juhos Reviewed-by: Md Sadre Alam Link: https://patch.msgid.link/20250428-qpic-snand-readloc2-fix-v1-1-50ce0877ff72@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-qpic-snand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index ae32c452d0bc..94948c8781e8 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -142,7 +142,7 @@ static void qcom_spi_set_read_loc_first(struct qcom_nand_controller *snandc, else if (reg == NAND_READ_LOCATION_1) snandc->regs->read_location1 = locreg_val; else if (reg == NAND_READ_LOCATION_2) - snandc->regs->read_location1 = locreg_val; + snandc->regs->read_location2 = locreg_val; else if (reg == NAND_READ_LOCATION_3) snandc->regs->read_location3 = locreg_val; } From 9fcd53c3206618166facf03ee3d465f66a107e01 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Tue, 29 Apr 2025 07:50:56 +0000 Subject: [PATCH 264/559] erofs: remove unused enum type Opt_err is not used in EROFS, we can remove it. Signed-off-by: Hongbo Li Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20250429075056.689570-1-lihongbo22@huawei.com Signed-off-by: Gao Xiang --- fs/erofs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index cadec6b1b554..da6ee7c39290 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -357,7 +357,6 @@ static void erofs_default_options(struct erofs_sb_info *sbi) enum { Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum, Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, - Opt_err }; static const struct constant_table erofs_param_cache_strategy[] = { From 4d5b71b487291da9f92e352c0a7e39f256d60db8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Apr 2025 07:31:41 +0200 Subject: [PATCH 265/559] ALSA: hda/realtek: Fix built-mic regression on other ASUS models A few ASUS models use the ALC256_FIXUP_ASUS_HEADSET_MODE although they have no built-in mic pin on NID 0x13, as found in the commit c1732ede5e80 ("ALSA: hda/realtek - Fix headset and mic on several Asus laptops with ALC256"). This was relatively harmless in the past as NID 0x13 was assigned as the secondary mic. But since the fix for the pin sort order, this pin became the primary one, hence user started noticing the broken input, and we've fixed already for a few ASUS models to switch to ALC256_FIXUP_ASUS_MIC_NO_PRESENCE. This patch corrects the other ASUS models to use the right quirk entry for fixing the built-in mic regression. Here we cover X541SA (1043:12e0), X541UV (1043:12f0), Z550SA (1043:13bf0) and X555UB (1043:1ccd). Fixes: 3b4309546b48 ("ALSA: hda: Fix headset detection failure due to unstable sort") Link: https://bugzilla.kernel.org/show_bug.cgi?id=220058 Link: https://patch.msgid.link/20250430053210.31776-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1799203af35a..7810d5f9b5aa 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10910,10 +10910,10 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x12a3, "Asus N7691ZM", ALC269_FIXUP_ASUS_N7601ZM), SND_PCI_QUIRK(0x1043, 0x12af, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x12b4, "ASUS B3405CCA / P3405CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC), - SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), SND_PCI_QUIRK(0x1043, 0x1433, "ASUS GX650PY/PZ/PV/PU/PYV/PZV/PIV/PVV", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1460, "Asus VivoBook 15", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -10967,7 +10967,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS), SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JU/JV/JI", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JY/JZ/JI/JG", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), - SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1ccf, "ASUS G814JU/JV/JI", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1cdf, "ASUS G814JY/JZ/JG", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1cef, "ASUS G834JY/JZ/JI/JG", ALC285_FIXUP_ASUS_HEADSET_MIC), From c4eb2f88d2796ab90c5430e11c48709716181364 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Fri, 25 Apr 2025 11:28:22 +0200 Subject: [PATCH 266/559] accel/ivpu: Increase state dump msg timeout Increase JMS message state dump command timeout to 100 ms. On some platforms, the FW may take a bit longer than 50 ms to dump its state to the log buffer and we don't want to miss any debug info during TDR. Fixes: 5e162f872d7a ("accel/ivpu: Add FW state dump on TDR") Cc: stable@vger.kernel.org # v6.13+ Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250425092822.2194465-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c index ec9a3629da3a..633160470c93 100644 --- a/drivers/accel/ivpu/ivpu_hw.c +++ b/drivers/accel/ivpu/ivpu_hw.c @@ -119,7 +119,7 @@ static void timeouts_init(struct ivpu_device *vdev) else vdev->timeout.autosuspend = 100; vdev->timeout.d0i3_entry_msg = 5; - vdev->timeout.state_dump_msg = 10; + vdev->timeout.state_dump_msg = 100; } } From f2ecc700d1ef53127c14a3463442a8ac2a6a5cf5 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Fri, 25 Apr 2025 11:33:40 +0200 Subject: [PATCH 267/559] accel/ivpu: Fix pm related deadlocks in cmdq ioctls Fix deadlocks in ivpu_cmdq_create_ioctl() and ivpu_cmdq_destroy_ioctl() related to runtime suspend. Runtime suspend acquires file_priv->lock mutex by calling ivpu_cmdq_reset_all_contexts(). The same lock is acquired in the cmdq ioctls. If one of the cmdq ioctls is called while runtime suspend is in progress, it can lead to a deadlock. Call stacks from example deadlock below. Runtime suspend thread: [ 3443.179717] Call Trace: [ 3443.179724] __schedule+0x4b6/0x16b0 [ 3443.179732] ? __mod_timer+0x27d/0x3a0 [ 3443.179738] schedule+0x2f/0x140 [ 3443.179741] schedule_preempt_disabled+0x19/0x30 [ 3443.179743] __mutex_lock.constprop.0+0x335/0x7d0 [ 3443.179745] ? xas_find+0x1ed/0x260 [ 3443.179747] ? xa_find+0x8e/0xf0 [ 3443.179749] __mutex_lock_slowpath+0x13/0x20 [ 3443.179751] mutex_lock+0x41/0x60 [ 3443.179757] ivpu_cmdq_reset_all_contexts+0x82/0x150 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.179786] ivpu_pm_runtime_suspend_cb+0x1f1/0x3f0 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.179850] pci_pm_runtime_suspend+0x6e/0x1f0 [ 3443.179870] ? __pfx_pci_pm_runtime_suspend+0x10/0x10 [ 3443.179886] __rpm_callback+0x48/0x130 [ 3443.179899] rpm_callback+0x64/0x70 [ 3443.179911] rpm_suspend+0x12c/0x630 [ 3443.179922] ? __schedule+0x4be/0x16b0 [ 3443.179941] pm_runtime_work+0xca/0xf0 [ 3443.179955] process_one_work+0x188/0x3d0 [ 3443.179971] worker_thread+0x2b9/0x3c0 [ 3443.179984] kthread+0xfb/0x220 [ 3443.180001] ? __pfx_worker_thread+0x10/0x10 [ 3443.180013] ? __pfx_kthread+0x10/0x10 [ 3443.180029] ret_from_fork+0x47/0x70 [ 3443.180044] ? __pfx_kthread+0x10/0x10 [ 3443.180059] ret_from_fork_asm+0x1a/0x30 User space thread: [ 3443.180128] Call Trace: [ 3443.180138] __schedule+0x4b6/0x16b0 [ 3443.180159] schedule+0x2f/0x140 [ 3443.180163] rpm_resume+0x1a7/0x6a0 [ 3443.180165] ? __pfx_autoremove_wake_function+0x10/0x10 [ 3443.180169] __pm_runtime_resume+0x56/0x90 [ 3443.180171] ivpu_rpm_get+0x28/0xb0 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180181] ivpu_ipc_send_receive+0x6d/0x120 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180193] ? free_frozen_pages+0x395/0x670 [ 3443.180199] ? __free_pages+0xa7/0xc0 [ 3443.180202] ivpu_jsm_hws_destroy_cmdq+0x76/0xf0 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180213] ? locks_dispose_list+0x6c/0xa0 [ 3443.180219] ? kmem_cache_free+0x342/0x470 [ 3443.180222] ? vm_area_free+0x19/0x30 [ 3443.180225] ? xas_load+0x17/0xf0 [ 3443.180229] ? xa_load+0x72/0xb0 [ 3443.180230] ivpu_cmdq_unregister.isra.0+0xb1/0x100 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180241] ivpu_cmdq_destroy_ioctl+0x8d/0x130 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180251] ? __pfx_ivpu_cmdq_destroy_ioctl+0x10/0x10 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180260] drm_ioctl_kernel+0xb3/0x110 [ 3443.180265] drm_ioctl+0x2ca/0x580 [ 3443.180266] ? __pfx_ivpu_cmdq_destroy_ioctl+0x10/0x10 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180275] ? __fput+0x1ae/0x2f0 [ 3443.180279] ? kmem_cache_free+0x342/0x470 [ 3443.180282] __x64_sys_ioctl+0xa9/0xe0 [ 3443.180286] x64_sys_call+0x13b7/0x26f0 [ 3443.180289] do_syscall_64+0x62/0x180 [ 3443.180291] entry_SYSCALL_64_after_hwframe+0x71/0x79 Fixes: 465a3914b254 ("accel/ivpu: Add API for command queue create/destroy/submit") Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250425093341.2202895-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 863e3cd6ace5..e17b3deda201 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -874,15 +874,21 @@ int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file * int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; struct drm_ivpu_cmdq_create *args = data; struct ivpu_cmdq *cmdq; + int ret; - if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) + if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) return -ENODEV; if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->lock); cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), false); @@ -891,6 +897,8 @@ int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file * mutex_unlock(&file_priv->lock); + ivpu_rpm_put(vdev); + return cmdq ? 0 : -ENOMEM; } @@ -900,28 +908,35 @@ int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file struct ivpu_device *vdev = file_priv->vdev; struct drm_ivpu_cmdq_destroy *args = data; struct ivpu_cmdq *cmdq; - u32 cmdq_id; + u32 cmdq_id = 0; int ret; if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) return -ENODEV; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->lock); cmdq = xa_load(&file_priv->cmdq_xa, args->cmdq_id); if (!cmdq || cmdq->is_legacy) { ret = -ENOENT; - goto err_unlock; + } else { + cmdq_id = cmdq->id; + ivpu_cmdq_destroy(file_priv, cmdq); + ret = 0; } - cmdq_id = cmdq->id; - ivpu_cmdq_destroy(file_priv, cmdq); mutex_unlock(&file_priv->lock); - ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id); - return 0; -err_unlock: - mutex_unlock(&file_priv->lock); + /* Abort any pending jobs only if cmdq was destroyed */ + if (!ret) + ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id); + + ivpu_rpm_put(vdev); + return ret; } From 75680b7cd461b169c7ccd2a0fba7542868b7fce2 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Fri, 25 Apr 2025 11:36:56 +0200 Subject: [PATCH 268/559] accel/ivpu: Correct mutex unlock order in job submission The mutex unlock for vdev->submitted_jobs_lock was incorrectly placed before unlocking file_priv->lock. Change order of unlocks to avoid potential race conditions. Fixes: 5bbccadaf33e ("accel/ivpu: Abort all jobs after command queue unregister") Signed-off-by: Karol Wachowski Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250425093656.2228168-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index e17b3deda201..b28da35c30b6 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -681,8 +681,8 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) err_erase_xa: xa_erase(&vdev->submitted_jobs_xa, job->job_id); err_unlock: - mutex_unlock(&vdev->submitted_jobs_lock); mutex_unlock(&file_priv->lock); + mutex_unlock(&vdev->submitted_jobs_lock); ivpu_rpm_put(vdev); return ret; } From 58f6217e5d0132a9f14e401e62796916aa055c1b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Apr 2025 17:13:55 -0700 Subject: [PATCH 269/559] perf/x86/intel: KVM: Mask PEBS_ENABLE loaded for guest with vCPU's value. When generating the MSR_IA32_PEBS_ENABLE value that will be loaded on VM-Entry to a KVM guest, mask the value with the vCPU's desired PEBS_ENABLE value. Consulting only the host kernel's host vs. guest masks results in running the guest with PEBS enabled even when the guest doesn't want to use PEBS. Because KVM uses perf events to proxy the guest virtual PMU, simply looking at exclude_host can't differentiate between events created by host userspace, and events created by KVM on behalf of the guest. Running the guest with PEBS unexpectedly enabled typically manifests as crashes due to a near-infinite stream of #PFs. E.g. if the guest hasn't written MSR_IA32_DS_AREA, the CPU will hit page faults on address '0' when trying to record PEBS events. The issue is most easily reproduced by running `perf kvm top` from before commit 7b100989b4f6 ("perf evlist: Remove __evlist__add_default") (after which, `perf kvm top` effectively stopped using PEBS). The userspace side of perf creates a guest-only PEBS event, which intel_guest_get_msrs() misconstrues a guest-*owned* PEBS event. Arguably, this is a userspace bug, as enabling PEBS on guest-only events simply cannot work, and userspace can kill VMs in many other ways (there is no danger to the host). However, even if this is considered to be bad userspace behavior, there's zero downside to perf/KVM restricting PEBS to guest-owned events. Note, commit 854250329c02 ("KVM: x86/pmu: Disable guest PEBS temporarily in two rare situations") fixed the case where host userspace is profiling KVM *and* userspace, but missed the case where userspace is profiling only KVM. Fixes: c59a1f106f5c ("KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS") Closes: https://lore.kernel.org/all/Z_VUswFkWiTYI0eD@do-x1carbon Reported-by: Seth Forshee Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dapeng Mi Tested-by: "Seth Forshee (DigitalOcean)" Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250426001355.1026530-1-seanjc@google.com --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 00dfe487bd00..c5f385413392 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4395,7 +4395,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) arr[pebs_enable] = (struct perf_guest_switch_msr){ .msr = MSR_IA32_PEBS_ENABLE, .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, - .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask, + .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable, }; if (arr[pebs_enable].host) { From 9ab7a709c926c16b4433cf02d04fcbcf35aaab2b Mon Sep 17 00:00:00 2001 From: Shravya KN Date: Mon, 28 Apr 2025 15:58:56 -0700 Subject: [PATCH 270/559] bnxt_en: Fix error handling path in bnxt_init_chip() WARN_ON() is triggered in __flush_work() if bnxt_init_chip() fails because we call cancel_work_sync() on dim work that has not been initialized. WARNING: CPU: 37 PID: 5223 at kernel/workqueue.c:4201 __flush_work.isra.0+0x212/0x230 The driver relies on the BNXT_STATE_NAPI_DISABLED bit to check if dim work has already been cancelled. But in the bnxt_open() path, BNXT_STATE_NAPI_DISABLED is not set and this causes the error path to think that it needs to cancel the uninitalized dim work. Fix it by setting BNXT_STATE_NAPI_DISABLED during initialization. The bit will be cleared when we enable NAPI and initialize dim work. Fixes: 40452969a506 ("bnxt_en: Fix DIM shutdown") Suggested-by: Somnath Kotur Reviewed-by: Somnath Kotur Signed-off-by: Shravya KN Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2c8e2c19d854..c4bccc683597 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11602,6 +11602,9 @@ static void bnxt_init_napi(struct bnxt *bp) poll_fn = bnxt_poll_p5; else if (BNXT_CHIP_TYPE_NITRO_A0(bp)) cp_nr_rings--; + + set_bit(BNXT_STATE_NAPI_DISABLED, &bp->state); + for (i = 0; i < cp_nr_rings; i++) { bnapi = bp->bnapi[i]; netif_napi_add_config_locked(bp->dev, &bnapi->napi, poll_fn, From 8e6cc9045380f3f0c48ebda2bda5e1abe263388d Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 28 Apr 2025 15:58:57 -0700 Subject: [PATCH 271/559] bnxt_en: Fix ethtool selftest output in one of the failure cases When RDMA driver is loaded, running offline self test is not supported and driver returns failure early. But it is not clearing the input buffer and hence the application prints some junk characters for individual test results. Fix it by clearing the buffer before returning. Fixes: 895621f1c816 ("bnxt_en: Don't support offline self test when RoCE driver is loaded") Reviewed-by: Somnath Kotur Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 48dd5922e4dd..7be37976f3e4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -4991,6 +4991,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, if (!bp->num_tests || !BNXT_PF(bp)) return; + memset(buf, 0, sizeof(u64) * bp->num_tests); if (etest->flags & ETH_TEST_FL_OFFLINE && bnxt_ulp_registered(bp->edev)) { etest->flags |= ETH_TEST_FL_FAILED; @@ -4998,7 +4999,6 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, return; } - memset(buf, 0, sizeof(u64) * bp->num_tests); if (!netif_running(dev)) { etest->flags |= ETH_TEST_FL_FAILED; return; From a63db07e4ecd45b027718168faf7d798bb47bf58 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Mon, 28 Apr 2025 15:58:58 -0700 Subject: [PATCH 272/559] bnxt_en: Add missing skb_mark_for_recycle() in bnxt_rx_vlan() If bnxt_rx_vlan() fails because the VLAN protocol ID is invalid, the SKB is freed but we're missing the call to recycle it. This may cause the warning: "page_pool_release_retry() stalled pool shutdown" Add the missing skb_mark_for_recycle() in bnxt_rx_vlan(). Fixes: 86b05508f775 ("bnxt_en: Use the unified RX page pool buffers for XDP and non-XDP") Reviewed-by: Kalesh AP Reviewed-by: Pavan Chebbi Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c4bccc683597..cfc9ccab39bf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2015,6 +2015,7 @@ static struct sk_buff *bnxt_rx_vlan(struct sk_buff *skb, u8 cmp_type, } return skb; vlan_err: + skb_mark_for_recycle(skb); dev_kfree_skb(skb); return NULL; } From 1ae04e489dd757e1e61999362f33e7c554c3b9e3 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Mon, 28 Apr 2025 15:58:59 -0700 Subject: [PATCH 273/559] bnxt_en: call pci_alloc_irq_vectors() after bnxt_reserve_rings() On some architectures (e.g. ARM), calling pci_alloc_irq_vectors() will immediately cause the MSIX table to be written. This will not work if we haven't called bnxt_reserve_rings() to properly map the MSIX table to the MSIX vectors reserved by FW. Fix the FW error recovery path to delay the bnxt_init_int_mode() -> pci_alloc_irq_vectors() call by removing it from bnxt_hwrm_if_change(). bnxt_request_irq() later in the code path will call it and by then the MSIX table is properly mapped. Fixes: 4343838ca5eb ("bnxt_en: Replace deprecated PCI MSIX APIs") Suggested-by: Michael Chan Signed-off-by: Kashyap Desai Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cfc9ccab39bf..3b2ea36f25a2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12399,13 +12399,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) set_bit(BNXT_STATE_ABORT_ERR, &bp->state); return rc; } + /* IRQ will be initialized later in bnxt_request_irq()*/ bnxt_clear_int_mode(bp); - rc = bnxt_init_int_mode(bp); - if (rc) { - clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state); - netdev_err(bp->dev, "init int mode failed\n"); - return rc; - } } rc = bnxt_cancel_reservations(bp, fw_reset); } From c2d20a3814d1b57dea6db82229edd702bde9c878 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Mon, 28 Apr 2025 15:59:00 -0700 Subject: [PATCH 274/559] bnxt_en: delay pci_alloc_irq_vectors() in the AER path This patch is similar to the last patch to delay the pci_alloc_irq_vectors() call in the AER path until after calling bnxt_reserve_rings(). bnxt_reserve_rings() needs to properly map the MSIX table first before we call pci_alloc_irq_vectors() which may immediately write to the MSIX table in some architectures. Move the bnxt_init_int_mode() call from bnxt_io_slot_reset() to bnxt_io_resume() after calling bnxt_reserve_rings(). With this change, the AER path may call bnxt_open() -> bnxt_hwrm_if_change() with bp->irq_tbl set to NULL. bp->irq_tbl is cleared when we call bnxt_clear_int_mode() in bnxt_io_slot_reset(). So we cannot use !bp->irq_tbl to detect aborted FW reset. Add a new BNXT_FW_RESET_STATE_ABORT to detect aborted FW reset in bnxt_hwrm_if_change(). Signed-off-by: Kashyap Desai Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 +++++++++++------ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3b2ea36f25a2..78e496b0ec26 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12325,12 +12325,15 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) { struct hwrm_func_drv_if_change_output *resp; struct hwrm_func_drv_if_change_input *req; - bool fw_reset = !bp->irq_tbl; bool resc_reinit = false; bool caps_change = false; int rc, retry = 0; + bool fw_reset; u32 flags = 0; + fw_reset = (bp->fw_reset_state == BNXT_FW_RESET_STATE_ABORT); + bp->fw_reset_state = 0; + if (!(bp->fw_cap & BNXT_FW_CAP_IF_CHANGE)) return 0; @@ -14833,7 +14836,7 @@ static void bnxt_fw_reset_abort(struct bnxt *bp, int rc) clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) bnxt_dl_health_fw_status_update(bp, false); - bp->fw_reset_state = 0; + bp->fw_reset_state = BNXT_FW_RESET_STATE_ABORT; netif_close(bp->dev); } @@ -16931,10 +16934,9 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) if (!err) result = PCI_ERS_RESULT_RECOVERED; + /* IRQ will be initialized later in bnxt_io_resume */ bnxt_ulp_irq_stop(bp); bnxt_clear_int_mode(bp); - err = bnxt_init_int_mode(bp); - bnxt_ulp_irq_restart(bp, err); } reset_exit: @@ -16963,10 +16965,13 @@ static void bnxt_io_resume(struct pci_dev *pdev) err = bnxt_hwrm_func_qcaps(bp); if (!err) { - if (netif_running(netdev)) + if (netif_running(netdev)) { err = bnxt_open(netdev); - else + } else { err = bnxt_reserve_rings(bp, true); + if (!err) + err = bnxt_init_int_mode(bp); + } } if (!err) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 21726cf56586..bc8b3b7e915d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2614,6 +2614,7 @@ struct bnxt { #define BNXT_FW_RESET_STATE_POLL_FW 4 #define BNXT_FW_RESET_STATE_OPENING 5 #define BNXT_FW_RESET_STATE_POLL_FW_DOWN 6 +#define BNXT_FW_RESET_STATE_ABORT 7 u16 fw_reset_min_dsecs; #define BNXT_DFLT_FW_RST_MIN_DSECS 20 From ea9376cf68230e05492f22ca45d329f16e262c7b Mon Sep 17 00:00:00 2001 From: Shruti Parab Date: Mon, 28 Apr 2025 15:59:01 -0700 Subject: [PATCH 275/559] bnxt_en: Fix coredump logic to free allocated buffer When handling HWRM_DBG_COREDUMP_LIST FW command in bnxt_hwrm_dbg_dma_data(), the allocated buffer info->dest_buf is not freed in the error path. In the normal path, info->dest_buf is assigned to coredump->data and it will eventually be freed after the coredump is collected. Free info->dest_buf immediately inside bnxt_hwrm_dbg_dma_data() in the error path. Fixes: c74751f4c392 ("bnxt_en: Return error if FW returns more data than dump length") Reported-by: Michael Chan Reviewed-by: Kalesh AP Signed-off-by: Shruti Parab Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 5576e7cf8463..9a74793648f6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -116,6 +116,11 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, memcpy(info->dest_buf + off, dma_buf, len); } else { rc = -ENOBUFS; + if (cmn_req->req_type == + cpu_to_le16(HWRM_DBG_COREDUMP_LIST)) { + kfree(info->dest_buf); + info->dest_buf = NULL; + } break; } } From 6b87bd94f34370bbf1dfa59352bed8efab5bf419 Mon Sep 17 00:00:00 2001 From: Shruti Parab Date: Mon, 28 Apr 2025 15:59:02 -0700 Subject: [PATCH 276/559] bnxt_en: Fix out-of-bound memcpy() during ethtool -w When retrieving the FW coredump using ethtool, it can sometimes cause memory corruption: BUG: KFENCE: memory corruption in __bnxt_get_coredump+0x3ef/0x670 [bnxt_en] Corrupted memory at 0x000000008f0f30e8 [ ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ] (in kfence-#45): __bnxt_get_coredump+0x3ef/0x670 [bnxt_en] ethtool_get_dump_data+0xdc/0x1a0 __dev_ethtool+0xa1e/0x1af0 dev_ethtool+0xa8/0x170 dev_ioctl+0x1b5/0x580 sock_do_ioctl+0xab/0xf0 sock_ioctl+0x1ce/0x2e0 __x64_sys_ioctl+0x87/0xc0 do_syscall_64+0x5c/0xf0 entry_SYSCALL_64_after_hwframe+0x78/0x80 ... This happens when copying the coredump segment list in bnxt_hwrm_dbg_dma_data() with the HWRM_DBG_COREDUMP_LIST FW command. The info->dest_buf buffer is allocated based on the number of coredump segments returned by the FW. The segment list is then DMA'ed by the FW and the length of the DMA is returned by FW. The driver then copies this DMA'ed segment list to info->dest_buf. In some cases, this DMA length may exceed the info->dest_buf length and cause the above BUG condition. Fix it by capping the copy length to not exceed the length of info->dest_buf. The extra DMA data contains no useful information. This code path is shared for the HWRM_DBG_COREDUMP_LIST and the HWRM_DBG_COREDUMP_RETRIEVE FW commands. The buffering is different for these 2 FW commands. To simplify the logic, we need to move the line to adjust the buffer length for HWRM_DBG_COREDUMP_RETRIEVE up, so that the new check to cap the copy length will work for both commands. Fixes: c74751f4c392 ("bnxt_en: Return error if FW returns more data than dump length") Reviewed-by: Kalesh AP Signed-off-by: Shruti Parab Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_coredump.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 9a74793648f6..a000d3f630bd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -110,10 +110,19 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, } } + if (cmn_req->req_type == + cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE)) + info->dest_buf_size += len; + if (info->dest_buf) { if ((info->seg_start + off + len) <= BNXT_COREDUMP_BUF_LEN(info->buf_len)) { - memcpy(info->dest_buf + off, dma_buf, len); + u16 copylen = min_t(u16, len, + info->dest_buf_size - off); + + memcpy(info->dest_buf + off, dma_buf, copylen); + if (copylen < len) + break; } else { rc = -ENOBUFS; if (cmn_req->req_type == @@ -125,10 +134,6 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, } } - if (cmn_req->req_type == - cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE)) - info->dest_buf_size += len; - if (!(cmn_resp->flags & HWRM_DBG_CMN_FLAGS_MORE)) break; From 02e8be5a032cae0f4ca33c6053c44d83cf4acc93 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 28 Apr 2025 15:59:03 -0700 Subject: [PATCH 277/559] bnxt_en: Fix ethtool -d byte order for 32-bit values For version 1 register dump that includes the PCIe stats, the existing code incorrectly assumes that all PCIe stats are 64-bit values. Fix it by using an array containing the starting and ending index of the 32-bit values. The loop in bnxt_get_regs() will use the array to do proper endian swap for the 32-bit values. Fixes: b5d600b027eb ("bnxt_en: Add support for 'ethtool -d'") Reviewed-by: Shruti Parab Reviewed-by: Kalesh AP Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 36 ++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 7be37976f3e4..f5d490bf997e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2062,6 +2062,17 @@ static int bnxt_get_regs_len(struct net_device *dev) return reg_len; } +#define BNXT_PCIE_32B_ENTRY(start, end) \ + { offsetof(struct pcie_ctx_hw_stats, start), \ + offsetof(struct pcie_ctx_hw_stats, end) } + +static const struct { + u16 start; + u16 end; +} bnxt_pcie_32b_entries[] = { + BNXT_PCIE_32B_ENTRY(pcie_ltssm_histogram[0], pcie_ltssm_histogram[3]), +}; + static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) { @@ -2094,12 +2105,27 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, req->pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr); rc = hwrm_req_send(bp, req); if (!rc) { - __le64 *src = (__le64 *)hw_pcie_stats; - u64 *dst = (u64 *)(_p + BNXT_PXP_REG_LEN); - int i; + u8 *dst = (u8 *)(_p + BNXT_PXP_REG_LEN); + u8 *src = (u8 *)hw_pcie_stats; + int i, j; - for (i = 0; i < sizeof(*hw_pcie_stats) / sizeof(__le64); i++) - dst[i] = le64_to_cpu(src[i]); + for (i = 0, j = 0; i < sizeof(*hw_pcie_stats); ) { + if (i >= bnxt_pcie_32b_entries[j].start && + i <= bnxt_pcie_32b_entries[j].end) { + u32 *dst32 = (u32 *)(dst + i); + + *dst32 = le32_to_cpu(*(__le32 *)(src + i)); + i += 4; + if (i > bnxt_pcie_32b_entries[j].end && + j < ARRAY_SIZE(bnxt_pcie_32b_entries) - 1) + j++; + } else { + u64 *dst64 = (u64 *)(dst + i); + + *dst64 = le64_to_cpu(*(__le64 *)(src + i)); + i += 8; + } + } } hwrm_req_drop(bp, req); } From 77e40bbce93059658aee02786a32c5c98a240a8a Mon Sep 17 00:00:00 2001 From: Michael Liang Date: Tue, 29 Apr 2025 10:42:01 -0600 Subject: [PATCH 278/559] nvme-tcp: fix premature queue removal and I/O failover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch addresses a data corruption issue observed in nvme-tcp during testing. In an NVMe native multipath setup, when an I/O timeout occurs, all inflight I/Os are canceled almost immediately after the kernel socket is shut down. These canceled I/Os are reported as host path errors, triggering a failover that succeeds on a different path. However, at this point, the original I/O may still be outstanding in the host's network transmission path (e.g., the NIC’s TX queue). From the user-space app's perspective, the buffer associated with the I/O is considered completed since they're acked on the different path and may be reused for new I/O requests. Because nvme-tcp enables zero-copy by default in the transmission path, this can lead to corrupted data being sent to the original target, ultimately causing data corruption. We can reproduce this data corruption by injecting delay on one path and triggering i/o timeout. To prevent this issue, this change ensures that all inflight transmissions are fully completed from host's perspective before returning from queue stop. To handle concurrent I/O timeout from multiple namespaces under the same controller, always wait in queue stop regardless of queue's state. This aligns with the behavior of queue stopping in other NVMe fabric transports. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Michael Liang Reviewed-by: Mohamed Khalfella Reviewed-by: Randy Jennings Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 72d260201d8c..aba365f97cf6 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1946,7 +1946,7 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) cancel_work_sync(&queue->io_work); } -static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +static void nvme_tcp_stop_queue_nowait(struct nvme_ctrl *nctrl, int qid) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; @@ -1965,6 +1965,31 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) mutex_unlock(&queue->queue_lock); } +static void nvme_tcp_wait_queue(struct nvme_ctrl *nctrl, int qid) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[qid]; + int timeout = 100; + + while (timeout > 0) { + if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags) || + !sk_wmem_alloc_get(queue->sock->sk)) + return; + msleep(2); + timeout -= 2; + } + dev_warn(nctrl->device, + "qid %d: timeout draining sock wmem allocation expired\n", + qid); +} + +static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +{ + nvme_tcp_stop_queue_nowait(nctrl, qid); + nvme_tcp_wait_queue(nctrl, qid); +} + + static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) { write_lock_bh(&queue->sock->sk->sk_callback_lock); @@ -2032,7 +2057,9 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) int i; for (i = 1; i < ctrl->queue_count; i++) - nvme_tcp_stop_queue(ctrl, i); + nvme_tcp_stop_queue_nowait(ctrl, i); + for (i = 1; i < ctrl->queue_count; i++) + nvme_tcp_wait_queue(ctrl, i); } static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl, From 521987940ad4fd37fe3d0340ec6f39c4e8e91e36 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Wed, 30 Apr 2025 08:40:25 +1000 Subject: [PATCH 279/559] nvme-tcp: select CONFIG_TLS from CONFIG_NVME_TCP_TLS Ensure that TLS support is enabled in the kernel when CONFIG_NVME_TCP_TLS is enabled. Without this the code compiles, but does not actually work unless something else enables CONFIG_TLS. Fixes: be8e82caa68 ("nvme-tcp: enable TLS handshake upcall") Signed-off-by: Alistair Francis Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index d47dfa80fb95..4d64b6935bb9 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -102,6 +102,7 @@ config NVME_TCP_TLS depends on NVME_TCP select NET_HANDSHAKE select KEYS + select TLS help Enables TLS encryption for NVMe TCP using the netlink handshake API. From ac38b7ef704c0659568fd4b2c7e6c1255fc51798 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Wed, 30 Apr 2025 08:23:47 +1000 Subject: [PATCH 280/559] nvmet-tcp: select CONFIG_TLS from CONFIG_NVME_TARGET_TCP_TLS Ensure that TLS support is enabled in the kernel when CONFIG_NVME_TARGET_TCP_TLS is enabled. Without this the code compiles, but does not actually work unless something else enables CONFIG_TLS. Fixes: 675b453e0241 ("nvmet-tcp: enable TLS handshake upcall") Signed-off-by: Alistair Francis Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index fb7446d6d682..4c253b433bf7 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -98,6 +98,7 @@ config NVME_TARGET_TCP_TLS bool "NVMe over Fabrics TCP target TLS encryption support" depends on NVME_TARGET_TCP select NET_HANDSHAKE + select TLS help Enables TLS encryption for the NVMe TCP target using the netlink handshake API. From 46d22b47df2741996af277a2838b95f130436c13 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Wed, 23 Apr 2025 16:06:21 +1000 Subject: [PATCH 281/559] nvmet-tcp: don't restore null sk_state_change queue->state_change is set as part of nvmet_tcp_set_queue_sock(), but if the TCP connection isn't established when nvmet_tcp_set_queue_sock() is called then queue->state_change isn't set and sock->sk->sk_state_change isn't replaced. As such we don't need to restore sock->sk->sk_state_change if queue->state_change is NULL. This avoids NULL pointer dereferences such as this: [ 286.462026][ C0] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 286.462814][ C0] #PF: supervisor instruction fetch in kernel mode [ 286.463796][ C0] #PF: error_code(0x0010) - not-present page [ 286.464392][ C0] PGD 8000000140620067 P4D 8000000140620067 PUD 114201067 PMD 0 [ 286.465086][ C0] Oops: Oops: 0010 [#1] SMP KASAN PTI [ 286.465559][ C0] CPU: 0 UID: 0 PID: 1628 Comm: nvme Not tainted 6.15.0-rc2+ #11 PREEMPT(voluntary) [ 286.466393][ C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-3.fc41 04/01/2014 [ 286.467147][ C0] RIP: 0010:0x0 [ 286.467420][ C0] Code: Unable to access opcode bytes at 0xffffffffffffffd6. [ 286.467977][ C0] RSP: 0018:ffff8883ae008580 EFLAGS: 00010246 [ 286.468425][ C0] RAX: 0000000000000000 RBX: ffff88813fd34100 RCX: ffffffffa386cc43 [ 286.469019][ C0] RDX: 1ffff11027fa68b6 RSI: 0000000000000008 RDI: ffff88813fd34100 [ 286.469545][ C0] RBP: ffff88813fd34160 R08: 0000000000000000 R09: ffffed1027fa682c [ 286.470072][ C0] R10: ffff88813fd34167 R11: 0000000000000000 R12: ffff88813fd344c3 [ 286.470585][ C0] R13: ffff88813fd34112 R14: ffff88813fd34aec R15: ffff888132cdd268 [ 286.471070][ C0] FS: 00007fe3c04c7d80(0000) GS:ffff88840743f000(0000) knlGS:0000000000000000 [ 286.471644][ C0] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 286.472543][ C0] CR2: ffffffffffffffd6 CR3: 000000012daca000 CR4: 00000000000006f0 [ 286.473500][ C0] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 286.474467][ C0] DR3: 0000000000000000 DR6: 00000000ffff07f0 DR7: 0000000000000400 [ 286.475453][ C0] Call Trace: [ 286.476102][ C0] [ 286.476719][ C0] tcp_fin+0x2bb/0x440 [ 286.477429][ C0] tcp_data_queue+0x190f/0x4e60 [ 286.478174][ C0] ? __build_skb_around+0x234/0x330 [ 286.478940][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.479659][ C0] ? __pfx_tcp_data_queue+0x10/0x10 [ 286.480431][ C0] ? tcp_try_undo_loss+0x640/0x6c0 [ 286.481196][ C0] ? seqcount_lockdep_reader_access.constprop.0+0x82/0x90 [ 286.482046][ C0] ? kvm_clock_get_cycles+0x14/0x30 [ 286.482769][ C0] ? ktime_get+0x66/0x150 [ 286.483433][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.484146][ C0] tcp_rcv_established+0x6e4/0x2050 [ 286.484857][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.485523][ C0] ? ipv4_dst_check+0x160/0x2b0 [ 286.486203][ C0] ? __pfx_tcp_rcv_established+0x10/0x10 [ 286.486917][ C0] ? lock_release+0x217/0x2c0 [ 286.487595][ C0] tcp_v4_do_rcv+0x4d6/0x9b0 [ 286.488279][ C0] tcp_v4_rcv+0x2af8/0x3e30 [ 286.488904][ C0] ? raw_local_deliver+0x51b/0xad0 [ 286.489551][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.490198][ C0] ? __pfx_tcp_v4_rcv+0x10/0x10 [ 286.490813][ C0] ? __pfx_raw_local_deliver+0x10/0x10 [ 286.491487][ C0] ? __pfx_nf_confirm+0x10/0x10 [nf_conntrack] [ 286.492275][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.492900][ C0] ip_protocol_deliver_rcu+0x8f/0x370 [ 286.493579][ C0] ip_local_deliver_finish+0x297/0x420 [ 286.494268][ C0] ip_local_deliver+0x168/0x430 [ 286.494867][ C0] ? __pfx_ip_local_deliver+0x10/0x10 [ 286.495498][ C0] ? __pfx_ip_local_deliver_finish+0x10/0x10 [ 286.496204][ C0] ? ip_rcv_finish_core+0x19a/0x1f20 [ 286.496806][ C0] ? lock_release+0x217/0x2c0 [ 286.497414][ C0] ip_rcv+0x455/0x6e0 [ 286.497945][ C0] ? __pfx_ip_rcv+0x10/0x10 [ 286.498550][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.499137][ C0] ? __pfx_ip_rcv_finish+0x10/0x10 [ 286.499763][ C0] ? lock_release+0x217/0x2c0 [ 286.500327][ C0] ? dl_scaled_delta_exec+0xd1/0x2c0 [ 286.500922][ C0] ? __pfx_ip_rcv+0x10/0x10 [ 286.501480][ C0] __netif_receive_skb_one_core+0x166/0x1b0 [ 286.502173][ C0] ? __pfx___netif_receive_skb_one_core+0x10/0x10 [ 286.502903][ C0] ? lock_acquire+0x2b2/0x310 [ 286.503487][ C0] ? process_backlog+0x372/0x1350 [ 286.504087][ C0] ? lock_release+0x217/0x2c0 [ 286.504642][ C0] process_backlog+0x3b9/0x1350 [ 286.505214][ C0] ? process_backlog+0x372/0x1350 [ 286.505779][ C0] __napi_poll.constprop.0+0xa6/0x490 [ 286.506363][ C0] net_rx_action+0x92e/0xe10 [ 286.506889][ C0] ? __pfx_net_rx_action+0x10/0x10 [ 286.507437][ C0] ? timerqueue_add+0x1f0/0x320 [ 286.507977][ C0] ? sched_clock_cpu+0x68/0x540 [ 286.508492][ C0] ? lock_acquire+0x2b2/0x310 [ 286.509043][ C0] ? kvm_sched_clock_read+0xd/0x20 [ 286.509607][ C0] ? handle_softirqs+0x1aa/0x7d0 [ 286.510187][ C0] handle_softirqs+0x1f2/0x7d0 [ 286.510754][ C0] ? __pfx_handle_softirqs+0x10/0x10 [ 286.511348][ C0] ? irqtime_account_irq+0x181/0x290 [ 286.511937][ C0] ? __dev_queue_xmit+0x85d/0x3450 [ 286.512510][ C0] do_softirq.part.0+0x89/0xc0 [ 286.513100][ C0] [ 286.513548][ C0] [ 286.513953][ C0] __local_bh_enable_ip+0x112/0x140 [ 286.514522][ C0] ? __dev_queue_xmit+0x85d/0x3450 [ 286.515072][ C0] __dev_queue_xmit+0x872/0x3450 [ 286.515619][ C0] ? nft_do_chain+0xe16/0x15b0 [nf_tables] [ 286.516252][ C0] ? __pfx___dev_queue_xmit+0x10/0x10 [ 286.516817][ C0] ? selinux_ip_postroute+0x43c/0xc50 [ 286.517433][ C0] ? __pfx_selinux_ip_postroute+0x10/0x10 [ 286.518061][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.518606][ C0] ? ip_output+0x164/0x4a0 [ 286.519149][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.519671][ C0] ? ip_finish_output2+0x17d5/0x1fb0 [ 286.520258][ C0] ip_finish_output2+0xb4b/0x1fb0 [ 286.520787][ C0] ? __pfx_ip_finish_output2+0x10/0x10 [ 286.521355][ C0] ? __ip_finish_output+0x15d/0x750 [ 286.521890][ C0] ip_output+0x164/0x4a0 [ 286.522372][ C0] ? __pfx_ip_output+0x10/0x10 [ 286.522872][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.523402][ C0] ? _raw_spin_unlock_irqrestore+0x4c/0x60 [ 286.524031][ C0] ? __pfx_ip_finish_output+0x10/0x10 [ 286.524605][ C0] ? __ip_queue_xmit+0x999/0x2260 [ 286.525200][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.525744][ C0] ? ipv4_dst_check+0x16a/0x2b0 [ 286.526279][ C0] ? lock_release+0x217/0x2c0 [ 286.526793][ C0] __ip_queue_xmit+0x1883/0x2260 [ 286.527324][ C0] ? __skb_clone+0x54c/0x730 [ 286.527827][ C0] __tcp_transmit_skb+0x209b/0x37a0 [ 286.528374][ C0] ? __pfx___tcp_transmit_skb+0x10/0x10 [ 286.528952][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.529472][ C0] ? seqcount_lockdep_reader_access.constprop.0+0x82/0x90 [ 286.530152][ C0] ? trace_hardirqs_on+0x12/0x120 [ 286.530691][ C0] tcp_write_xmit+0xb81/0x88b0 [ 286.531224][ C0] ? mod_memcg_state+0x4d/0x60 [ 286.531736][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.532253][ C0] __tcp_push_pending_frames+0x90/0x320 [ 286.532826][ C0] tcp_send_fin+0x141/0xb50 [ 286.533352][ C0] ? __pfx_tcp_send_fin+0x10/0x10 [ 286.533908][ C0] ? __local_bh_enable_ip+0xab/0x140 [ 286.534495][ C0] inet_shutdown+0x243/0x320 [ 286.535077][ C0] nvme_tcp_alloc_queue+0xb3b/0x2590 [nvme_tcp] [ 286.535709][ C0] ? do_raw_spin_lock+0x129/0x260 [ 286.536314][ C0] ? __pfx_nvme_tcp_alloc_queue+0x10/0x10 [nvme_tcp] [ 286.536996][ C0] ? do_raw_spin_unlock+0x54/0x1e0 [ 286.537550][ C0] ? _raw_spin_unlock+0x29/0x50 [ 286.538127][ C0] ? do_raw_spin_lock+0x129/0x260 [ 286.538664][ C0] ? __pfx_do_raw_spin_lock+0x10/0x10 [ 286.539249][ C0] ? nvme_tcp_alloc_admin_queue+0xd5/0x340 [nvme_tcp] [ 286.539892][ C0] ? __wake_up+0x40/0x60 [ 286.540392][ C0] nvme_tcp_alloc_admin_queue+0xd5/0x340 [nvme_tcp] [ 286.541047][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.541589][ C0] nvme_tcp_setup_ctrl+0x8b/0x7a0 [nvme_tcp] [ 286.542254][ C0] ? _raw_spin_unlock_irqrestore+0x4c/0x60 [ 286.542887][ C0] ? __pfx_nvme_tcp_setup_ctrl+0x10/0x10 [nvme_tcp] [ 286.543568][ C0] ? trace_hardirqs_on+0x12/0x120 [ 286.544166][ C0] ? _raw_spin_unlock_irqrestore+0x35/0x60 [ 286.544792][ C0] ? nvme_change_ctrl_state+0x196/0x2e0 [nvme_core] [ 286.545477][ C0] nvme_tcp_create_ctrl+0x839/0xb90 [nvme_tcp] [ 286.546126][ C0] nvmf_dev_write+0x3db/0x7e0 [nvme_fabrics] [ 286.546775][ C0] ? rw_verify_area+0x69/0x520 [ 286.547334][ C0] vfs_write+0x218/0xe90 [ 286.547854][ C0] ? do_syscall_64+0x9f/0x190 [ 286.548408][ C0] ? trace_hardirqs_on_prepare+0xdb/0x120 [ 286.549037][ C0] ? syscall_exit_to_user_mode+0x93/0x280 [ 286.549659][ C0] ? __pfx_vfs_write+0x10/0x10 [ 286.550259][ C0] ? do_syscall_64+0x9f/0x190 [ 286.550840][ C0] ? syscall_exit_to_user_mode+0x8e/0x280 [ 286.551516][ C0] ? trace_hardirqs_on_prepare+0xdb/0x120 [ 286.552180][ C0] ? syscall_exit_to_user_mode+0x93/0x280 [ 286.552834][ C0] ? ksys_read+0xf5/0x1c0 [ 286.553386][ C0] ? __pfx_ksys_read+0x10/0x10 [ 286.553964][ C0] ksys_write+0xf5/0x1c0 [ 286.554499][ C0] ? __pfx_ksys_write+0x10/0x10 [ 286.555072][ C0] ? trace_hardirqs_on_prepare+0xdb/0x120 [ 286.555698][ C0] ? syscall_exit_to_user_mode+0x93/0x280 [ 286.556319][ C0] ? do_syscall_64+0x54/0x190 [ 286.556866][ C0] do_syscall_64+0x93/0x190 [ 286.557420][ C0] ? rcu_read_unlock+0x17/0x60 [ 286.557986][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.558526][ C0] ? lock_release+0x217/0x2c0 [ 286.559087][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.559659][ C0] ? count_memcg_events.constprop.0+0x4a/0x60 [ 286.560476][ C0] ? exc_page_fault+0x7a/0x110 [ 286.561064][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.561647][ C0] ? lock_release+0x217/0x2c0 [ 286.562257][ C0] ? do_user_addr_fault+0x171/0xa00 [ 286.562839][ C0] ? do_user_addr_fault+0x4a2/0xa00 [ 286.563453][ C0] ? irqentry_exit_to_user_mode+0x84/0x270 [ 286.564112][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.564677][ C0] ? irqentry_exit_to_user_mode+0x84/0x270 [ 286.565317][ C0] ? trace_hardirqs_on_prepare+0xdb/0x120 [ 286.565922][ C0] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 286.566542][ C0] RIP: 0033:0x7fe3c05e6504 [ 286.567102][ C0] Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d c5 8b 10 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 [ 286.568931][ C0] RSP: 002b:00007fff76444f58 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 [ 286.569807][ C0] RAX: ffffffffffffffda RBX: 000000003b40d930 RCX: 00007fe3c05e6504 [ 286.570621][ C0] RDX: 00000000000000cf RSI: 000000003b40d930 RDI: 0000000000000003 [ 286.571443][ C0] RBP: 0000000000000003 R08: 00000000000000cf R09: 000000003b40d930 [ 286.572246][ C0] R10: 0000000000000000 R11: 0000000000000202 R12: 000000003b40cd60 [ 286.573069][ C0] R13: 00000000000000cf R14: 00007fe3c07417f8 R15: 00007fe3c073502e [ 286.573886][ C0] Closes: https://lore.kernel.org/linux-nvme/5hdonndzoqa265oq3bj6iarwtfk5dewxxjtbjvn5uqnwclpwt6@a2n6w3taxxex/ Signed-off-by: Alistair Francis Reviewed-by: Sagi Grimberg Tested-by: Shin'ichiro Kawasaki Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index f2d0c920269b..12a5cb8641ca 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1560,6 +1560,9 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) { struct socket *sock = queue->sock; + if (!queue->state_change) + return; + write_lock_bh(&sock->sk->sk_callback_lock); sock->sk->sk_data_ready = queue->data_ready; sock->sk->sk_state_change = queue->state_change; From 8edb86b2ed1d63cc400aecae8eb8c8114837171a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 25 Apr 2025 11:34:34 +0200 Subject: [PATCH 282/559] nvmet-auth: always free derived key data After calling nvme_auth_derive_tls_psk() we need to free the resulting psk data, as either TLS is disable (and we don't need the data anyway) or the psk data is copied into the resulting key (and can be free, too). Fixes: fa2e0f8bbc68 ("nvmet-tcp: support secure channel concatenation") Reported-by: Yi Zhang Suggested-by: Maurizio Lombardi Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Tested-by: Yi Zhang Signed-off-by: Christoph Hellwig --- drivers/nvme/target/auth.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index cef8d77f477b..9429b8218408 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -600,13 +600,12 @@ void nvmet_auth_insert_psk(struct nvmet_sq *sq) pr_warn("%s: ctrl %d qid %d failed to refresh key, error %ld\n", __func__, sq->ctrl->cntlid, sq->qid, PTR_ERR(tls_key)); tls_key = NULL; - kfree_sensitive(tls_psk); } if (sq->ctrl->tls_key) key_put(sq->ctrl->tls_key); sq->ctrl->tls_key = tls_key; #endif - + kfree_sensitive(tls_psk); out_free_digest: kfree_sensitive(digest); out_free_psk: From f024d3a8ded0d8d2129ae123d7a5305c29ca44ce Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Apr 2025 07:17:17 -0600 Subject: [PATCH 283/559] io_uring/fdinfo: annotate racy sq/cq head/tail reads syzbot complains about the cached sq head read, and it's totally right. But we don't need to care, it's just reading fdinfo, and reading the CQ or SQ tail/head entries are known racy in that they are just a view into that very instant and may of course be outdated by the time they are reported. Annotate both the SQ head and CQ tail read with data_race() to avoid this syzbot complaint. Link: https://lore.kernel.org/io-uring/6811f6dc.050a0220.39e3a1.0d0e.GAE@google.com/ Reported-by: syzbot+3e77fd302e99f5af9394@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- io_uring/fdinfo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index f60d0a9d505e..9414ca6d101c 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -123,11 +123,11 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) seq_printf(m, "SqMask:\t0x%x\n", sq_mask); seq_printf(m, "SqHead:\t%u\n", sq_head); seq_printf(m, "SqTail:\t%u\n", sq_tail); - seq_printf(m, "CachedSqHead:\t%u\n", ctx->cached_sq_head); + seq_printf(m, "CachedSqHead:\t%u\n", data_race(ctx->cached_sq_head)); seq_printf(m, "CqMask:\t0x%x\n", cq_mask); seq_printf(m, "CqHead:\t%u\n", cq_head); seq_printf(m, "CqTail:\t%u\n", cq_tail); - seq_printf(m, "CachedCqTail:\t%u\n", ctx->cached_cq_tail); + seq_printf(m, "CachedCqTail:\t%u\n", data_race(ctx->cached_cq_tail)); seq_printf(m, "SQEs:\t%u\n", sq_tail - sq_head); sq_entries = min(sq_tail - sq_head, ctx->sq_entries); for (i = 0; i < sq_entries; i++) { From 533a8a67cc3b072a24122d1ea10a8db66a5c9393 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 28 Apr 2025 21:50:07 +0200 Subject: [PATCH 284/559] soundwire: intel_auxdevice: Fix system suspend/resume handling Before commit bca84a7b93fd ("PM: sleep: Use DPM_FLAG_SMART_SUSPEND conditionally") the runtime PM status of the device in intel_resume() had always been RPM_ACTIVE because setting DPM_FLAG_SMART_SUSPEND had caused the core to call pm_runtime_set_active() for that device during the "noirq" resume phase. For this reason, the pm_runtime_suspended() check in intel_resume() had never triggered and the code depending on it had never run. That had not caused any observable functional issues to appear, so effectively the code in question had never been needed. After commit bca84a7b93fd the core does not call pm_runtime_set_active() for all devices with DPM_FLAG_SMART_SUSPEND set any more and the code depending on the pm_runtime_suspended() check in intel_resume() runs if the device is runtime-suspended prior to a system-wide suspend transition. Unfortunately, when it runs, it breaks things due to the attempt to runtime-resume bus->dev which most likely is not ready for a runtime resume at that point. It also does other more-or-less questionable things. Namely, it calls pm_runtime_idle() for a device with a nonzero runtime PM usage counter which has no effect (all devices have nonzero runtime PM usage counters during system-wide suspend and resume). It also calls pm_runtime_mark_last_busy() for the device even though devices cannot runtime-suspend during system-wide suspend and resume (because their runtime PM usage counters are nonzero) and an analogous call is made in the same function later. Moreover, it sets the runtime PM status of the device to RPM_ACTIVE before activating it. For the reasons listed above, remove that code altogether. On top of that, add a pm_runtime_disable() call to intel_suspend() to prevent the device from being runtime-resumed at any point after intel_suspend() has started to manipulate it because the changes made by that function would be undone by a runtime-suspend of the device. Next, once runtime PM has been disabled, the runtime PM status of the device cannot change, so pm_runtime_status_suspended() can be used instead of pm_runtime_suspended() in intel_suspend(). Finally, make intel_resume() call pm_runtime_set_active() at the end to set the runtime PM status of the device to "active" because it has just been activated and re-enable runtime PM for it after that. Additionally, drop the setting of DPM_FLAG_SMART_SUSPEND from the driver because it has no effect on devices handled by it. Fixes: bca84a7b93fd ("PM: sleep: Use DPM_FLAG_SMART_SUSPEND conditionally") Reported-by: Bard Liao Tested-by: Bard Liao Signed-off-by: Rafael J. Wysocki Acked-by: Bard Liao Link: https://patch.msgid.link/12680420.O9o76ZdvQC@rjwysocki.net --- drivers/soundwire/intel_auxdevice.c | 36 +++++++++++------------------ 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c index 5ea6399e6c9b..10a602d4843a 100644 --- a/drivers/soundwire/intel_auxdevice.c +++ b/drivers/soundwire/intel_auxdevice.c @@ -353,9 +353,6 @@ static int intel_link_probe(struct auxiliary_device *auxdev, /* use generic bandwidth allocation algorithm */ sdw->cdns.bus.compute_params = sdw_compute_params; - /* avoid resuming from pm_runtime suspend if it's not required */ - dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); - ret = sdw_bus_master_add(bus, dev, dev->fwnode); if (ret) { dev_err(dev, "sdw_bus_master_add fail: %d\n", ret); @@ -640,7 +637,10 @@ static int __maybe_unused intel_suspend(struct device *dev) return 0; } - if (pm_runtime_suspended(dev)) { + /* Prevent runtime PM from racing with the code below. */ + pm_runtime_disable(dev); + + if (pm_runtime_status_suspended(dev)) { dev_dbg(dev, "pm_runtime status: suspended\n"); clock_stop_quirks = sdw->link_res->clock_stop_quirks; @@ -648,7 +648,7 @@ static int __maybe_unused intel_suspend(struct device *dev) if ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) || !clock_stop_quirks) { - if (pm_runtime_suspended(dev->parent)) { + if (pm_runtime_status_suspended(dev->parent)) { /* * paranoia check: this should not happen with the .prepare * resume to full power @@ -715,7 +715,6 @@ static int __maybe_unused intel_resume(struct device *dev) struct sdw_cdns *cdns = dev_get_drvdata(dev); struct sdw_intel *sdw = cdns_to_intel(cdns); struct sdw_bus *bus = &cdns->bus; - int link_flags; int ret; if (bus->prop.hw_disabled || !sdw->startup_done) { @@ -724,23 +723,6 @@ static int __maybe_unused intel_resume(struct device *dev) return 0; } - if (pm_runtime_suspended(dev)) { - dev_dbg(dev, "pm_runtime status was suspended, forcing active\n"); - - /* follow required sequence from runtime_pm.rst */ - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_mark_last_busy(dev); - pm_runtime_enable(dev); - - pm_runtime_resume(bus->dev); - - link_flags = md_flags >> (bus->link_id * 8); - - if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE)) - pm_runtime_idle(dev); - } - ret = sdw_intel_link_power_up(sdw); if (ret) { dev_err(dev, "%s failed: %d\n", __func__, ret); @@ -760,6 +742,14 @@ static int __maybe_unused intel_resume(struct device *dev) return ret; } + /* + * Runtime PM has been disabled in intel_suspend(), so set the status + * to active because the device has just been resumed and re-enable + * runtime PM. + */ + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + /* * after system resume, the pm_runtime suspend() may kick in * during the enumeration, before any children device force the From ac4e04d9e378f5aa826c2406ad7871ae1b6a6fb9 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 29 Apr 2025 14:07:11 -0700 Subject: [PATCH 285/559] cpufreq: intel_pstate: Unchecked MSR aceess in legacy mode When turbo mode is unavailable on a Skylake-X system, executing the command: # echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo results in an unchecked MSR access error: WRMSR to 0x199 (attempted to write 0x0000000100001300). This issue was reproduced on an OEM (Original Equipment Manufacturer) system and is not a common problem across all Skylake-X systems. This error occurs because the MSR 0x199 Turbo Engage Bit (bit 32) is set when turbo mode is disabled. The issue arises when intel_pstate fails to detect that turbo mode is disabled. Here intel_pstate relies on MSR_IA32_MISC_ENABLE bit 38 to determine the status of turbo mode. However, on this system, bit 38 is not set even when turbo mode is disabled. According to the Intel Software Developer's Manual (SDM), the BIOS sets this bit during platform initialization to enable or disable opportunistic processor performance operations. Logically, this bit should be set in such cases. However, the SDM also specifies that "OS and applications must use CPUID leaf 06H to detect processors with opportunistic processor performance operations enabled." Therefore, in addition to checking MSR_IA32_MISC_ENABLE bit 38, verify that CPUID.06H:EAX[1] is 0 to accurately determine if turbo mode is disabled. Fixes: 4521e1a0ce17 ("cpufreq: intel_pstate: Reflect current no_turbo state correctly") Signed-off-by: Srinivas Pandruvada Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f41ed0b9e610..ba9bf06f1c77 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -598,6 +598,9 @@ static bool turbo_is_disabled(void) { u64 misc_en; + if (!cpu_feature_enabled(X86_FEATURE_IDA)) + return true; + rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); From 74c72419ec8da5cbc9c49410d3c44bb954538bdd Mon Sep 17 00:00:00 2001 From: Jethro Donaldson Date: Wed, 30 Apr 2025 00:59:15 +1200 Subject: [PATCH 286/559] smb: client: fix zero length for mkdir POSIX create context SMB create requests issued via smb311_posix_mkdir() have an incorrect length of zero bytes for the POSIX create context data. ksmbd server rejects such requests and logs "cli req too short" causing mkdir to fail with "invalid argument" on the client side. It also causes subsequent rmmod to crash in cifs_destroy_request_bufs() Inspection of packets sent by cifs.ko using wireshark show valid data for the SMB2_POSIX_CREATE_CONTEXT is appended with the correct offset, but with an incorrect length of zero bytes. Fails with ksmbd+cifs.ko only as Windows server/client does not use POSIX extensions. Fix smb311_posix_mkdir() to set req->CreateContextsLength as part of appending the POSIX creation context to the request. Signed-off-by: Jethro Donaldson Acked-by: Paulo Alcantara (Red Hat) Reviewed-by: Namjae Jeon Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index c4d52bebd37d..a9a49555d43b 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2921,6 +2921,7 @@ replay_again: req->CreateContextsOffset = cpu_to_le32( sizeof(struct smb2_create_req) + iov[1].iov_len); + le32_add_cpu(&req->CreateContextsLength, iov[n_iov-1].iov_len); pc_buf = iov[n_iov-1].iov_base; } From 1041c117a2c33cdffc4f695ac4b469e9124d24d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 30 Dec 2024 20:34:18 +0100 Subject: [PATCH 287/559] cifs: Fix and improve cifs_query_path_info() and cifs_query_file_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CAP_NT_SMBS was not negotiated then do not issue CIFSSMBQPathInfo() and CIFSSMBQFileInfo() commands. CIFSSMBQPathInfo() is not supported by non-NT Win9x SMB server and CIFSSMBQFileInfo() returns from Win9x SMB server bogus data in Attributes field (for example lot of files are marked as reparse points, even Win9x does not support them and read-only bit is not marked for read-only files). Correct information is returned by CIFSFindFirst() or SMBQueryInformation() command. So as a fallback in cifs_query_path_info() function use CIFSFindFirst() with SMB_FIND_FILE_FULL_DIRECTORY_INFO level which is supported by both NT and non-NT servers and as a last option use SMBQueryInformation() as it was before. And in function cifs_query_file_info() immediately returns -EOPNOTSUPP when not communicating with NT server. Client then revalidate inode entry by the cifs_query_path_info() call, which is working fine. So fstat() syscall on already opened file will receive correct information. Note that both fallback functions in non-UNICODE mode expands wildcards. Therefore those fallback functions cannot be used on paths which contain SMB wildcard characters (* ? " > <). CIFSFindFirst() returns all 4 time attributes as opposite of SMBQueryInformation() which returns only one. With this change it is possible to query all 4 times attributes from Win9x server and at the same time, client minimize sending of unsupported commands to server. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/smb1ops.c | 103 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 95 insertions(+), 8 deletions(-) diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 0adeec652dc1..5c7a80bce5bd 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -543,24 +543,104 @@ static int cifs_query_path_info(const unsigned int xid, const char *full_path, struct cifs_open_info_data *data) { - int rc; + int rc = -EOPNOTSUPP; FILE_ALL_INFO fi = {}; + struct cifs_search_info search_info = {}; + bool non_unicode_wildcard = false; data->reparse_point = false; data->adjust_tz = false; - /* could do find first instead but this returns more info */ - rc = CIFSSMBQPathInfo(xid, tcon, full_path, &fi, 0 /* not legacy */, cifs_sb->local_nls, - cifs_remap(cifs_sb)); /* - * BB optimize code so we do not make the above call when server claims - * no NT SMB support and the above call failed at least once - set flag - * in tcon or mount. + * First try CIFSSMBQPathInfo() function which returns more info + * (NumberOfLinks) than CIFSFindFirst() fallback function. + * Some servers like Win9x do not support SMB_QUERY_FILE_ALL_INFO over + * TRANS2_QUERY_PATH_INFORMATION, but supports it with filehandle over + * TRANS2_QUERY_FILE_INFORMATION (function CIFSSMBQFileInfo(). But SMB + * Open command on non-NT servers works only for files, does not work + * for directories. And moreover Win9x SMB server returns bogus data in + * SMB_QUERY_FILE_ALL_INFO Attributes field. So for non-NT servers, + * do not even use CIFSSMBQPathInfo() or CIFSSMBQFileInfo() function. */ - if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { + if (tcon->ses->capabilities & CAP_NT_SMBS) + rc = CIFSSMBQPathInfo(xid, tcon, full_path, &fi, 0 /* not legacy */, + cifs_sb->local_nls, cifs_remap(cifs_sb)); + + /* + * Non-UNICODE variant of fallback functions below expands wildcards, + * so they cannot be used for querying paths with wildcard characters. + */ + if (rc && !(tcon->ses->capabilities & CAP_UNICODE) && strpbrk(full_path, "*?\"><")) + non_unicode_wildcard = true; + + /* + * Then fallback to CIFSFindFirst() which works also with non-NT servers + * but does not does not provide NumberOfLinks. + */ + if ((rc == -EOPNOTSUPP || rc == -EINVAL) && + !non_unicode_wildcard) { + if (!(tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find)) + search_info.info_level = SMB_FIND_FILE_INFO_STANDARD; + else + search_info.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO; + rc = CIFSFindFirst(xid, tcon, full_path, cifs_sb, NULL, + CIFS_SEARCH_CLOSE_ALWAYS | CIFS_SEARCH_CLOSE_AT_END, + &search_info, false); + if (rc == 0) { + if (!(tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find)) { + FIND_FILE_STANDARD_INFO *di; + int offset = tcon->ses->server->timeAdj; + + di = (FIND_FILE_STANDARD_INFO *)search_info.srch_entries_start; + fi.CreationTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->CreationDate, di->CreationTime, offset))); + fi.LastAccessTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->LastAccessDate, di->LastAccessTime, offset))); + fi.LastWriteTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->LastWriteDate, di->LastWriteTime, offset))); + fi.ChangeTime = fi.LastWriteTime; + fi.Attributes = cpu_to_le32(le16_to_cpu(di->Attributes)); + fi.AllocationSize = cpu_to_le64(le32_to_cpu(di->AllocationSize)); + fi.EndOfFile = cpu_to_le64(le32_to_cpu(di->DataSize)); + } else { + FILE_FULL_DIRECTORY_INFO *di; + + di = (FILE_FULL_DIRECTORY_INFO *)search_info.srch_entries_start; + fi.CreationTime = di->CreationTime; + fi.LastAccessTime = di->LastAccessTime; + fi.LastWriteTime = di->LastWriteTime; + fi.ChangeTime = di->ChangeTime; + fi.Attributes = di->ExtFileAttributes; + fi.AllocationSize = di->AllocationSize; + fi.EndOfFile = di->EndOfFile; + fi.EASize = di->EaSize; + } + fi.NumberOfLinks = cpu_to_le32(1); + fi.DeletePending = 0; + fi.Directory = !!(le32_to_cpu(fi.Attributes) & ATTR_DIRECTORY); + cifs_buf_release(search_info.ntwrk_buf_start); + } else if (!full_path[0]) { + /* + * CIFSFindFirst() does not work on root path if the + * root path was exported on the server from the top + * level path (drive letter). + */ + rc = -EOPNOTSUPP; + } + } + + /* + * If everything failed then fallback to the legacy SMB command + * SMB_COM_QUERY_INFORMATION which works with all servers, but + * provide just few information. + */ + if ((rc == -EOPNOTSUPP || rc == -EINVAL) && !non_unicode_wildcard) { rc = SMBQueryInformation(xid, tcon, full_path, &fi, cifs_sb->local_nls, cifs_remap(cifs_sb)); data->adjust_tz = true; + } else if ((rc == -EOPNOTSUPP || rc == -EINVAL) && non_unicode_wildcard) { + /* Path with non-UNICODE wildcard character cannot exist. */ + rc = -ENOENT; } if (!rc) { @@ -639,6 +719,13 @@ static int cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, int rc; FILE_ALL_INFO fi = {}; + /* + * CIFSSMBQFileInfo() for non-NT servers returns bogus data in + * Attributes fields. So do not use this command for non-NT servers. + */ + if (!(tcon->ses->capabilities & CAP_NT_SMBS)) + return -EOPNOTSUPP; + if (cfile->symlink_target) { data->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL); if (!data->symlink_target) From f122121796f91168d0894c2710b8dd71330a34f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 30 Dec 2024 21:32:39 +0100 Subject: [PATCH 288/559] cifs: Fix changing times and read-only attr over SMB1 smb_set_file_info() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function CIFSSMBSetPathInfo() is not supported by non-NT servers and returns error. Fallback code via open filehandle and CIFSSMBSetFileInfo() does not work neither because CIFS_open() works also only on NT server. Therefore currently the whole smb_set_file_info() function as a SMB1 callback for the ->set_file_info() does not work with older non-NT SMB servers, like Win9x and others. This change implements fallback code in smb_set_file_info() which will works with any server and allows to change time values and also to set or clear read-only attributes. To make existing fallback code via CIFSSMBSetFileInfo() working with also non-NT servers, it is needed to change open function from CIFS_open() (which is NT specific) to cifs_open_file() which works with any server (this is just a open wrapper function which choose the correct open function supported by the server). CIFSSMBSetFileInfo() is working also on non-NT servers, but zero time values are not treated specially. So first it is needed to fill all time values if some of them are missing, via cifs_query_path_info() call. There is another issue, opening file in write-mode (needed for changing attributes) is not possible when the file has read-only attribute set. The only option how to clear read-only attribute is via SMB_COM_SETATTR command. And opening directory is not possible neither and here the SMB_COM_SETATTR command is the only option how to change attributes. And CIFSSMBSetFileInfo() does not honor setting read-only attribute, so for setting is also needed to use SMB_COM_SETATTR command. Existing code in cifs_query_path_info() is already using SMB_COM_GETATTR as a fallback code path (function SMBQueryInformation()), so introduce a new function SMBSetInformation which will implement SMB_COM_SETATTR command. My testing showed that Windows XP SMB1 client is also using SMB_COM_SETATTR command for setting or clearing read-only attribute against non-NT server. So this can prove that this is the correct way how to do it. With this change it is possible set all 4 time values and all attributes, including clearing and setting read-only bit on non-NT SMB servers. Tested against Win98 SMB1 server. This change fixes "touch" command which was failing when called on existing file. And fixes also "chmod +w" and "chmod -w" commands which were also failing (as they are changing read-only attribute). Note that this change depends on following change "cifs: Improve cifs_query_path_info() and cifs_query_file_info()" as it require to query all 4 time attribute values. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/cifspdu.h | 5 +- fs/smb/client/cifsproto.h | 4 ++ fs/smb/client/cifssmb.c | 57 +++++++++++++++++++ fs/smb/client/smb1ops.c | 112 +++++++++++++++++++++++++++++++++++--- 4 files changed, 166 insertions(+), 12 deletions(-) diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 18d67ab113f0..1b79fe07476f 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -1266,10 +1266,9 @@ typedef struct smb_com_query_information_rsp { typedef struct smb_com_setattr_req { struct smb_hdr hdr; /* wct = 8 */ __le16 attr; - __le16 time_low; - __le16 time_high; + __le32 last_write_time; __le16 reserved[5]; /* must be zero */ - __u16 ByteCount; + __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII */ unsigned char fileName[]; } __attribute__((packed)) SETATTR_REQ; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 59f6fdfe560e..ecf774a8f1ca 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -395,6 +395,10 @@ extern int CIFSSMBQFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon); extern int CIFSSMBQFSPosixInfo(const unsigned int xid, struct cifs_tcon *tcon, struct kstatfs *FSData); +extern int SMBSetInformation(const unsigned int xid, struct cifs_tcon *tcon, + const char *fileName, __le32 attributes, __le64 write_time, + const struct nls_table *nls_codepage, + struct cifs_sb_info *cifs_sb); extern int CIFSSMBSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const FILE_BASIC_INFO *data, const struct nls_table *nls_codepage, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 60cb264a01e5..f55457b4b82e 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -5171,6 +5171,63 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +int +SMBSetInformation(const unsigned int xid, struct cifs_tcon *tcon, + const char *fileName, __le32 attributes, __le64 write_time, + const struct nls_table *nls_codepage, + struct cifs_sb_info *cifs_sb) +{ + SETATTR_REQ *pSMB; + SETATTR_RSP *pSMBr; + struct timespec64 ts; + int bytes_returned; + int name_len; + int rc; + + cifs_dbg(FYI, "In %s path %s\n", __func__, fileName); + +retry: + rc = smb_init(SMB_COM_SETATTR, 8, tcon, (void **) &pSMB, + (void **) &pSMBr); + if (rc) + return rc; + + if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { + name_len = + cifsConvertToUTF16((__le16 *) pSMB->fileName, + fileName, PATH_MAX, nls_codepage, + cifs_remap(cifs_sb)); + name_len++; /* trailing null */ + name_len *= 2; + } else { + name_len = copy_path_name(pSMB->fileName, fileName); + } + /* Only few attributes can be set by this command, others are not accepted by Win9x. */ + pSMB->attr = cpu_to_le16(le32_to_cpu(attributes) & + (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM | ATTR_ARCHIVE)); + /* Zero write time value (in both NT and SETATTR formats) means to not change it. */ + if (le64_to_cpu(write_time) != 0) { + ts = cifs_NTtimeToUnix(write_time); + pSMB->last_write_time = cpu_to_le32(ts.tv_sec); + } + pSMB->BufferFormat = 0x04; + name_len++; /* account for buffer type byte */ + inc_rfc1001_len(pSMB, (__u16)name_len); + pSMB->ByteCount = cpu_to_le16(name_len); + + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + (struct smb_hdr *) pSMBr, &bytes_returned, 0); + if (rc) + cifs_dbg(FYI, "Send error in %s = %d\n", __func__, rc); + + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto retry; + + return rc; +} + /* Some legacy servers such as NT4 require that the file times be set on an open handle, rather than by pathname - this is awkward due to potential access conflicts on the open, but it is unavoidable for these diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 5c7a80bce5bd..ab26b9e9b0e7 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -896,6 +896,9 @@ smb_set_file_info(struct inode *inode, const char *full_path, struct cifs_fid fid; struct cifs_open_parms oparms; struct cifsFileInfo *open_file; + FILE_BASIC_INFO new_buf; + struct cifs_open_info_data query_data; + __le64 write_time = buf->LastWriteTime; struct cifsInodeInfo *cinode = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = NULL; @@ -903,20 +906,58 @@ smb_set_file_info(struct inode *inode, const char *full_path, /* if the file is already open for write, just use that fileid */ open_file = find_writable_file(cinode, FIND_WR_FSUID_ONLY); + if (open_file) { fid.netfid = open_file->fid.netfid; netpid = open_file->pid; tcon = tlink_tcon(open_file->tlink); - goto set_via_filehandle; + } else { + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + tlink = NULL; + goto out; + } + tcon = tlink_tcon(tlink); } - tlink = cifs_sb_tlink(cifs_sb); - if (IS_ERR(tlink)) { - rc = PTR_ERR(tlink); - tlink = NULL; - goto out; + /* + * Non-NT servers interprets zero time value in SMB_SET_FILE_BASIC_INFO + * over TRANS2_SET_FILE_INFORMATION as a valid time value. NT servers + * interprets zero time value as do not change existing value on server. + * API of ->set_file_info() callback expects that zero time value has + * the NT meaning - do not change. Therefore if server is non-NT and + * some time values in "buf" are zero, then fetch missing time values. + */ + if (!(tcon->ses->capabilities & CAP_NT_SMBS) && + (!buf->CreationTime || !buf->LastAccessTime || + !buf->LastWriteTime || !buf->ChangeTime)) { + rc = cifs_query_path_info(xid, tcon, cifs_sb, full_path, &query_data); + if (rc) { + if (open_file) { + cifsFileInfo_put(open_file); + open_file = NULL; + } + goto out; + } + /* + * Original write_time from buf->LastWriteTime is preserved + * as SMBSetInformation() interprets zero as do not change. + */ + new_buf = *buf; + buf = &new_buf; + if (!buf->CreationTime) + buf->CreationTime = query_data.fi.CreationTime; + if (!buf->LastAccessTime) + buf->LastAccessTime = query_data.fi.LastAccessTime; + if (!buf->LastWriteTime) + buf->LastWriteTime = query_data.fi.LastWriteTime; + if (!buf->ChangeTime) + buf->ChangeTime = query_data.fi.ChangeTime; } - tcon = tlink_tcon(tlink); + + if (open_file) + goto set_via_filehandle; rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, cifs_sb->local_nls, cifs_sb); @@ -937,8 +978,45 @@ smb_set_file_info(struct inode *inode, const char *full_path, .fid = &fid, }; - cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for times not supported by this server\n"); - rc = CIFS_open(xid, &oparms, &oplock, NULL); + if (S_ISDIR(inode->i_mode) && !(tcon->ses->capabilities & CAP_NT_SMBS)) { + /* Opening directory path is not possible on non-NT servers. */ + rc = -EOPNOTSUPP; + } else { + /* + * Use cifs_open_file() instead of CIFS_open() as the + * cifs_open_file() selects the correct function which + * works also on non-NT servers. + */ + rc = cifs_open_file(xid, &oparms, &oplock, NULL); + /* + * Opening path for writing on non-NT servers is not + * possible when the read-only attribute is already set. + * Non-NT server in this case returns -EACCES. For those + * servers the only possible way how to clear the read-only + * bit is via SMB_COM_SETATTR command. + */ + if (rc == -EACCES && + (cinode->cifsAttrs & ATTR_READONLY) && + le32_to_cpu(buf->Attributes) != 0 && /* 0 = do not change attrs */ + !(le32_to_cpu(buf->Attributes) & ATTR_READONLY) && + !(tcon->ses->capabilities & CAP_NT_SMBS)) + rc = -EOPNOTSUPP; + } + + /* Fallback to SMB_COM_SETATTR command when absolutelty needed. */ + if (rc == -EOPNOTSUPP) { + cifs_dbg(FYI, "calling SetInformation since SetPathInfo for attrs/times not supported by this server\n"); + rc = SMBSetInformation(xid, tcon, full_path, + buf->Attributes != 0 ? buf->Attributes : cpu_to_le32(cinode->cifsAttrs), + write_time, + cifs_sb->local_nls, cifs_sb); + if (rc == 0) + cinode->cifsAttrs = le32_to_cpu(buf->Attributes); + else + rc = -EACCES; + goto out; + } + if (rc != 0) { if (rc == -EIO) rc = -EINVAL; @@ -946,6 +1024,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, } netpid = current->tgid; + cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for attrs/times not supported by this server\n"); set_via_filehandle: rc = CIFSSMBSetFileInfo(xid, tcon, buf, fid.netfid, netpid); @@ -956,6 +1035,21 @@ set_via_filehandle: CIFSSMBClose(xid, tcon, fid.netfid); else cifsFileInfo_put(open_file); + + /* + * Setting the read-only bit is not honered on non-NT servers when done + * via open-semantics. So for setting it, use SMB_COM_SETATTR command. + * This command works only after the file is closed, so use it only when + * operation was called without the filehandle. + */ + if (open_file == NULL && + !(tcon->ses->capabilities & CAP_NT_SMBS) && + le32_to_cpu(buf->Attributes) & ATTR_READONLY) { + SMBSetInformation(xid, tcon, full_path, + buf->Attributes, + 0 /* do not change write time */, + cifs_sb->local_nls, cifs_sb); + } out: if (tlink != NULL) cifs_put_tlink(tlink); From 2feaa92c7c0123013a2a3e3d02ff8a5f5a794e96 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 23:33:06 -0400 Subject: [PATCH 289/559] bcachefs: improve missing journal write device error message Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 2a54ac79189b..63cdf885c9e2 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1782,7 +1782,7 @@ static CLOSURE_CALLBACK(journal_write_submit) struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE); if (!ca) { /* XXX: fix this */ - bch_err(c, "missing device for journal write\n"); + bch_err(c, "missing device %u for journal write", ptr->dev); continue; } From 5e63d579e752549dc256a952bcb35ade398ee921 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 29 Apr 2025 14:30:01 -0400 Subject: [PATCH 290/559] bcachefs: readdir fixes - Don't call bch2_trans_relock() after dir_emit(); taking a transaction restart here will cause us to emit the same dirent to userspace twice - Fix incorrect checking of the return value on dir_emit(): "true" means success, keep going, but bch2_dir_emit() needs to return true when we're finished iterating. https://github.com/koverstreet/bcachefs/issues/867 Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 92ee59d9e00e..8a680e52c1ed 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -685,7 +685,7 @@ static int bch2_dir_emit(struct dir_context *ctx, struct bkey_s_c_dirent d, subv vfs_d_type(d.v->d_type)); if (ret) ctx->pos = d.k->p.offset + 1; - return ret; + return !ret; } int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) @@ -710,7 +710,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) if (ret2 > 0) continue; - ret2 ?: drop_locks_do(trans, bch2_dir_emit(ctx, dirent, target)); + ret2 ?: (bch2_trans_unlock(trans), bch2_dir_emit(ctx, dirent, target)); }))); bch2_bkey_buf_exit(&sk, c); From 63f5235e0291152a2ac2c4ef3c1196cb6dfb3ef7 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Wed, 30 Apr 2025 18:18:43 +0800 Subject: [PATCH 291/559] ALSA: hda/realtek - Add more HP laptops which need mute led fixup More HP EliteBook with Realtek HDA codec ALC3247 and combined CS35L56 Amplifiers need quirk ALC236_FIXUP_HP_GPIO_LED to fix the micmute LED. Signed-off-by: Chris Chiu Cc: Link: https://patch.msgid.link/20250430101843.150833-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7810d5f9b5aa..8a2b09e4a7d5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10863,8 +10863,11 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8de8, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8de9, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8dec, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ded, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8dee, "HP EliteBook 660 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8def, "HP EliteBook 660 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8df0, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8df1, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8dfc, "HP EliteBook 645 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8dfe, "HP EliteBook 665 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e11, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2), From 650266ac4c7230c89bcd1307acf5c9c92cfa85e2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 30 Apr 2025 11:05:54 +0300 Subject: [PATCH 292/559] dm: add missing unlock on in dm_keyslot_evict() We need to call dm_put_live_table() even if dm_get_live_table() returns NULL. Fixes: 9355a9eb21a5 ("dm: support key eviction from keyslot managers of underlying devices") Cc: stable@vger.kernel.org # v5.12+ Signed-off-by: Dan Carpenter Signed-off-by: Mikulas Patocka --- drivers/md/dm-table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 9e175c5e0634..31d67a1a91dd 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1173,7 +1173,7 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, t = dm_get_live_table(md, &srcu_idx); if (!t) - return 0; + goto put_live_table; for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); @@ -1184,6 +1184,7 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, (void *)key); } +put_live_table: dm_put_live_table(md, srcu_idx); return 0; } From c44572e0cc13c9afff83fd333135a0aa9b27ba26 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 27 Apr 2025 13:34:24 +0200 Subject: [PATCH 293/559] MIPS: Fix MAX_REG_OFFSET Fix MAX_REG_OFFSET to point to the last register in 'pt_regs' and not to the marker itself, which could allow regs_get_register() to return an invalid offset. Fixes: 40e084a506eb ("MIPS: Add uprobes support.") Suggested-by: Maciej W. Rozycki Signed-off-by: Thorsten Blum Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/ptrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index 85fa9962266a..ef72c46b5568 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -65,7 +65,8 @@ static inline void instruction_pointer_set(struct pt_regs *regs, /* Query offset/name of register from its name/offset */ extern int regs_query_register_offset(const char *name); -#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last)) +#define MAX_REG_OFFSET \ + (offsetof(struct pt_regs, __last) - sizeof(unsigned long)) /** * regs_get_register() - get register value from its offset From e6a3fc4f10b872d02e25f83227e725c79b25d893 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 30 Apr 2025 14:48:37 +0200 Subject: [PATCH 294/559] genirq/msi: Prevent NULL pointer dereference in msi_domain_debug_show() irq_domain_debug_show_one() calls msi_domain_debug_show() with a non-NULL domain pointer and a NULL irq_data pointer. irq_debug_show_data() calls it with a NULL domain pointer. The domain pointer is not used, but the irq_data pointer is required to be non-NULL and lacks a NULL pointer check. Add the missing NULL pointer check to ensure there is a non-NULL irq_data pointer in msi_domain_debug_show() before dereferencing it. [ tglx: Massaged change log ] Fixes: 01499ae673dc ("genirq/msi: Expose MSI message data in debugfs") Signed-off-by: Andrew Jones Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250430124836.49964-2-ajones@ventanamicro.com --- kernel/irq/msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 5c8d43cdb0a3..c05ba7ca00fa 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -761,7 +761,7 @@ static int msi_domain_translate(struct irq_domain *domain, struct irq_fwspec *fw static void msi_domain_debug_show(struct seq_file *m, struct irq_domain *d, struct irq_data *irqd, int ind) { - struct msi_desc *desc = irq_data_get_msi_desc(irqd); + struct msi_desc *desc = irqd ? irq_data_get_msi_desc(irqd) : NULL; if (!desc) return; From edea92770a3b6454dc796fc5436a3315bb402181 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 30 Apr 2025 18:52:08 +0200 Subject: [PATCH 295/559] ASoC: stm32: sai: skip useless iterations on kernel rate loop the frequency of the kernel clock must be greater than or equal to the bitclock rate. When searching for a convenient kernel clock rate in stm32_sai_set_parent_rate() function, it is useless to continue the loop below bitclock rate, as it will result in a invalid kernel clock rate. Change the loop output condition. Fixes: 2cfe1ff22555 ("ASoC: stm32: sai: add stm32mp25 support") Signed-off-by: Olivier Moysan Link: https://patch.msgid.link/20250430165210.321273-2-olivier.moysan@foss.st.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index e8c1abf1ae0a..4d018b4bc3f0 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -409,11 +409,11 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, unsigned int rate) { struct platform_device *pdev = sai->pdev; - unsigned int sai_ck_rate, sai_ck_max_rate, sai_curr_rate, sai_new_rate; + unsigned int sai_ck_rate, sai_ck_max_rate, sai_ck_min_rate, sai_curr_rate, sai_new_rate; int div, ret; /* - * Set maximum expected kernel clock frequency + * Set minimum and maximum expected kernel clock frequency * - mclk on or spdif: * f_sai_ck = MCKDIV * mclk-fs * fs * Here typical 256 ratio is assumed for mclk-fs @@ -423,13 +423,16 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, * Set constraint MCKDIV * FRL <= 256, to ensure MCKDIV is in available range * f_sai_ck = sai_ck_max_rate * pow_of_two(FRL) / 256 */ + sai_ck_min_rate = rate * 256; if (!(rate % SAI_RATE_11K)) sai_ck_max_rate = SAI_MAX_SAMPLE_RATE_11K * 256; else sai_ck_max_rate = SAI_MAX_SAMPLE_RATE_8K * 256; - if (!sai->sai_mclk && !STM_SAI_PROTOCOL_IS_SPDIF(sai)) + if (!sai->sai_mclk && !STM_SAI_PROTOCOL_IS_SPDIF(sai)) { + sai_ck_min_rate = rate * sai->fs_length; sai_ck_max_rate /= DIV_ROUND_CLOSEST(256, roundup_pow_of_two(sai->fs_length)); + } /* * Request exclusivity, as the clock is shared by SAI sub-blocks and by @@ -472,7 +475,7 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, /* Try a lower frequency */ div++; sai_ck_rate = sai_ck_max_rate / div; - } while (sai_ck_rate > rate); + } while (sai_ck_rate >= sai_ck_min_rate); /* No accurate rate found */ dev_err(&pdev->dev, "Failed to find an accurate rate"); From cce34d113e2a592806abcdc02c7f8513775d8b20 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 30 Apr 2025 18:52:09 +0200 Subject: [PATCH 296/559] ASoC: stm32: sai: add a check on minimal kernel frequency On MP2 SoCs SAI kernel clock rate is managed through stm32_sai_set_parent_rate() function. If the kernel clock rate was set previously to a low frequency, this frequency may be too low to support the newly requested audio stream rate. However the stm32_sai_rate_accurate() will only check accuracy against the maximum kernel clock rate. The function will return leaving the kernel clock rate unchanged. Add a check on minimal frequency requirement, to avoid this. Fixes: 2cfe1ff22555 ("ASoC: stm32: sai: add stm32mp25 support") Signed-off-by: Olivier Moysan Link: https://patch.msgid.link/20250430165210.321273-3-olivier.moysan@foss.st.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index 4d018b4bc3f0..bf5299ba11c3 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -447,7 +447,10 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, * return immediately. */ sai_curr_rate = clk_get_rate(sai->sai_ck); - if (stm32_sai_rate_accurate(sai_ck_max_rate, sai_curr_rate)) + dev_dbg(&pdev->dev, "kernel clock rate: min [%u], max [%u], current [%u]", + sai_ck_min_rate, sai_ck_max_rate, sai_curr_rate); + if (stm32_sai_rate_accurate(sai_ck_max_rate, sai_curr_rate) && + sai_curr_rate >= sai_ck_min_rate) return 0; /* From 02b44a2b2bdcee03cbb92484d31e9ca1b91b2a38 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 30 Apr 2025 11:31:19 +0100 Subject: [PATCH 297/559] ASoC: intel/sdw_utils: Add volume limit to cs42l43 speakers The volume control for cs42l43 speakers has a maximum gain of +31.5 dB. However, for many use cases, this can cause distorted audio, depending various factors, such as other signal-processing elements in the chain, for example if the audio passes through a gain control before reaching the codec or the signal path has been tuned for a particular maximum gain in the codec. In the case of systems which use the soc_sdw_cs42l43 driver, audio will likely be distorted in all cases above 0 dB, therefore add a volume limit of 128, which is 0 dB maximum volume inside this driver. Signed-off-by: Stefan Binding Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20250430103134.24579-2-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_cs42l43.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/sdw_utils/soc_sdw_cs42l43.c b/sound/soc/sdw_utils/soc_sdw_cs42l43.c index 668c9d28a1c1..b415d45d520d 100644 --- a/sound/soc/sdw_utils/soc_sdw_cs42l43.c +++ b/sound/soc/sdw_utils/soc_sdw_cs42l43.c @@ -20,6 +20,8 @@ #include #include +#define CS42L43_SPK_VOLUME_0DB 128 /* 0dB Max */ + static const struct snd_soc_dapm_route cs42l43_hs_map[] = { { "Headphone", NULL, "cs42l43 AMP3_OUT" }, { "Headphone", NULL, "cs42l43 AMP4_OUT" }, @@ -117,6 +119,14 @@ int asoc_sdw_cs42l43_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_so return -ENOMEM; } + ret = snd_soc_limit_volume(card, "cs42l43 Speaker Digital Volume", + CS42L43_SPK_VOLUME_0DB); + if (ret) + dev_err(card->dev, "cs42l43 speaker volume limit failed: %d\n", ret); + else + dev_info(card->dev, "Setting CS42L43 Speaker volume limit to %d\n", + CS42L43_SPK_VOLUME_0DB); + ret = snd_soc_dapm_add_routes(&card->dapm, cs42l43_spk_map, ARRAY_SIZE(cs42l43_spk_map)); if (ret) From d5463e531c128ff1b141fdba2e13345cd50028a4 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 30 Apr 2025 11:31:20 +0100 Subject: [PATCH 298/559] ASoC: intel/sdw_utils: Add volume limit to cs35l56 speakers The volume control for cs35l56 speakers has a maximum gain of +12 dB. However, for many use cases, this can cause distorted audio, depending various factors, such as other signal-processing elements in the chain, for example if the audio passes through a gain control before reaching the amp or the signal path has been tuned for a particular maximum gain in the amp. In the case of systems which use the soc_sdw_* driver, audio will likely be distorted in all cases above 0 dB, therefore add a volume limit of 400, which is 0 dB maximum volume inside this driver. The volume limit should be applied to both soundwire and soundwire bridge configurations. Signed-off-by: Stefan Binding Link: https://patch.msgid.link/20250430103134.24579-3-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 1 + sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c | 4 ++++ sound/soc/sdw_utils/soc_sdw_cs_amp.c | 24 ++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 36a4a1e1d8ca..d8bd5d37131a 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -226,6 +226,7 @@ int asoc_sdw_cs_amp_init(struct snd_soc_card *card, bool playback); int asoc_sdw_cs_spk_feedback_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_cs35l56_volume_limit(struct snd_soc_card *card, const char *name_prefix); /* MAXIM codec support */ int asoc_sdw_maxim_init(struct snd_soc_card *card, diff --git a/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c b/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c index 246e5c2e0af5..c7e55f443351 100644 --- a/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c +++ b/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c @@ -60,6 +60,10 @@ static int asoc_sdw_bridge_cs35l56_asp_init(struct snd_soc_pcm_runtime *rtd) /* 4 x 16-bit sample slots and FSYNC=48000, BCLK=3.072 MHz */ for_each_rtd_codec_dais(rtd, i, codec_dai) { + ret = asoc_sdw_cs35l56_volume_limit(card, codec_dai->component->name_prefix); + if (ret) + return ret; + ret = snd_soc_dai_set_tdm_slot(codec_dai, tx_mask, rx_mask, 4, 16); if (ret < 0) return ret; diff --git a/sound/soc/sdw_utils/soc_sdw_cs_amp.c b/sound/soc/sdw_utils/soc_sdw_cs_amp.c index 4b6181cf2971..35b550bcd4de 100644 --- a/sound/soc/sdw_utils/soc_sdw_cs_amp.c +++ b/sound/soc/sdw_utils/soc_sdw_cs_amp.c @@ -16,6 +16,25 @@ #define CODEC_NAME_SIZE 8 #define CS_AMP_CHANNELS_PER_AMP 4 +#define CS35L56_SPK_VOLUME_0DB 400 /* 0dB Max */ + +int asoc_sdw_cs35l56_volume_limit(struct snd_soc_card *card, const char *name_prefix) +{ + char *volume_ctl_name; + int ret; + + volume_ctl_name = kasprintf(GFP_KERNEL, "%s Speaker Volume", name_prefix); + if (!volume_ctl_name) + return -ENOMEM; + + ret = snd_soc_limit_volume(card, volume_ctl_name, CS35L56_SPK_VOLUME_0DB); + if (ret) + dev_err(card->dev, "%s limit set failed: %d\n", volume_ctl_name, ret); + + kfree(volume_ctl_name); + return ret; +} +EXPORT_SYMBOL_NS(asoc_sdw_cs35l56_volume_limit, "SND_SOC_SDW_UTILS"); int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai) { @@ -40,6 +59,11 @@ int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai snprintf(widget_name, sizeof(widget_name), "%s SPK", codec_dai->component->name_prefix); + + ret = asoc_sdw_cs35l56_volume_limit(card, codec_dai->component->name_prefix); + if (ret) + return ret; + ret = snd_soc_dapm_add_routes(&card->dapm, &route, 1); if (ret) return ret; From 3cc393d2232ec770b5f79bf0673d67702a3536c3 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 29 Apr 2025 11:49:10 +0200 Subject: [PATCH 299/559] ASoC: simple-card-utils: Fix pointer check in graph_util_parse_link_direction Actually check if the passed pointers are valid, before writing to them. This also fixes a USBAN warning: UBSAN: invalid-load in ../sound/soc/fsl/imx-card.c:687:25 load of value 255 is not a valid value for type '_Bool' This is because playback_only is uninitialized and is not written to, as the playback-only property is absent. Fixes: 844de7eebe97 ("ASoC: audio-graph-card2: expand dai_link property part") Signed-off-by: Alexander Stein Link: https://patch.msgid.link/20250429094910.1150970-1-alexander.stein@ew.tq-group.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card-utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index a1ccc300e68c..3ae2a212a2e3 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -1174,9 +1174,9 @@ void graph_util_parse_link_direction(struct device_node *np, bool is_playback_only = of_property_read_bool(np, "playback-only"); bool is_capture_only = of_property_read_bool(np, "capture-only"); - if (is_playback_only) + if (playback_only) *playback_only = is_playback_only; - if (is_capture_only) + if (capture_only) *capture_only = is_capture_only; } EXPORT_SYMBOL_GPL(graph_util_parse_link_direction); From 7f91f012c1df07af6b915d1f8cece202774bb50e Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 May 2025 01:24:43 +0530 Subject: [PATCH 300/559] ASoC: amd: ps: fix for irq handler return status If any Soundwire manager interrupt is reported, and wake interrupt is not reported, in this scenario irq_flag will be set to zero, which results in interrupt handler return status as IRQ_NONE. Add new irq flag 'wake_irq_flag' check for SoundWire wake interrupt handling to fix incorrect irq handling return status. Fixes: 3898b189079c8 ("ASoC: amd: ps: add soundwire wake interrupt handling") Signed-off-by: Vijendar Mukunda Link: https://patch.msgid.link/20250430195517.3065308-1-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/ps/pci-ps.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/amd/ps/pci-ps.c b/sound/soc/amd/ps/pci-ps.c index 8e57f31ef7f7..7936b3173632 100644 --- a/sound/soc/amd/ps/pci-ps.c +++ b/sound/soc/amd/ps/pci-ps.c @@ -193,6 +193,7 @@ static irqreturn_t acp63_irq_handler(int irq, void *dev_id) struct amd_sdw_manager *amd_manager; u32 ext_intr_stat, ext_intr_stat1; u16 irq_flag = 0; + u16 wake_irq_flag = 0; u16 sdw_dma_irq_flag = 0; adata = dev_id; @@ -231,7 +232,7 @@ static irqreturn_t acp63_irq_handler(int irq, void *dev_id) } if (adata->acp_rev >= ACP70_PCI_REV) - irq_flag = check_and_handle_acp70_sdw_wake_irq(adata); + wake_irq_flag = check_and_handle_acp70_sdw_wake_irq(adata); if (ext_intr_stat & BIT(PDM_DMA_STAT)) { ps_pdm_data = dev_get_drvdata(&adata->pdm_dev->dev); @@ -245,7 +246,7 @@ static irqreturn_t acp63_irq_handler(int irq, void *dev_id) if (sdw_dma_irq_flag) return IRQ_WAKE_THREAD; - if (irq_flag) + if (irq_flag | wake_irq_flag) return IRQ_HANDLED; else return IRQ_NONE; From 4f79eaa2ceac86a0e0f304b0bab556cca5bf4f30 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 30 Apr 2025 15:56:34 -0700 Subject: [PATCH 301/559] kbuild: Properly disable -Wunterminated-string-initialization for clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang and GCC have different behaviors around disabling warnings included in -Wall and -Wextra and the order in which flags are specified, which is exposed by clang's new support for -Wunterminated-string-initialization. $ cat test.c const char foo[3] = "FOO"; const char bar[3] __attribute__((__nonstring__)) = "BAR"; $ clang -fsyntax-only -Wextra test.c test.c:1:21: warning: initializer-string for character array is too long, array size is 3 but initializer has size 4 (including the null terminating character); did you mean to use the 'nonstring' attribute? [-Wunterminated-string-initialization] 1 | const char foo[3] = "FOO"; | ^~~~~ $ clang -fsyntax-only -Wextra -Wno-unterminated-string-initialization test.c $ clang -fsyntax-only -Wno-unterminated-string-initialization -Wextra test.c test.c:1:21: warning: initializer-string for character array is too long, array size is 3 but initializer has size 4 (including the null terminating character); did you mean to use the 'nonstring' attribute? [-Wunterminated-string-initialization] 1 | const char foo[3] = "FOO"; | ^~~~~ $ gcc -fsyntax-only -Wextra test.c test.c:1:21: warning: initializer-string for array of ‘char’ truncates NUL terminator but destination lacks ‘nonstring’ attribute (4 chars into 3 available) [-Wunterminated-string-initialization] 1 | const char foo[3] = "FOO"; | ^~~~~ $ gcc -fsyntax-only -Wextra -Wno-unterminated-string-initialization test.c $ gcc -fsyntax-only -Wno-unterminated-string-initialization -Wextra test.c Move -Wextra up right below -Wall in Makefile.extrawarn to ensure these flags are at the beginning of the warning options list. Move the couple of warning options that have been added to the main Makefile since commit e88ca24319e4 ("kbuild: consolidate warning flags in scripts/Makefile.extrawarn") to scripts/Makefile.extrawarn after -Wall / -Wextra to ensure they get properly disabled for all compilers. Fixes: 9d7a0577c9db ("gcc-15: disable '-Wunterminated-string-initialization' entirely for now") Link: https://github.com/llvm/llvm-project/issues/10359 Signed-off-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- Makefile | 7 ------- scripts/Makefile.extrawarn | 9 ++++++++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 5aa9ee52a765..94be5dfb81fb 100644 --- a/Makefile +++ b/Makefile @@ -1052,13 +1052,6 @@ NOSTDINC_FLAGS += -nostdinc # perform bounds checking. KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) -#Currently, disable -Wstringop-overflow for GCC 11, globally. -KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-disable-warning, stringop-overflow) -KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) - -#Currently, disable -Wunterminated-string-initialization as broken -KBUILD_CFLAGS += $(call cc-disable-warning, unterminated-string-initialization) - # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 2d6e59561c9d..d88acdf40855 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -8,6 +8,7 @@ # Default set of warnings, always enabled KBUILD_CFLAGS += -Wall +KBUILD_CFLAGS += -Wextra KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -Werror=implicit-function-declaration KBUILD_CFLAGS += -Werror=implicit-int @@ -56,6 +57,13 @@ KBUILD_CFLAGS += -Wno-pointer-sign # globally built with -Wcast-function-type. KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type) +# Currently, disable -Wstringop-overflow for GCC 11, globally. +KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-disable-warning, stringop-overflow) +KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) + +# Currently, disable -Wunterminated-string-initialization as broken +KBUILD_CFLAGS += $(call cc-disable-warning, unterminated-string-initialization) + # The allocators already balk at large sizes, so silence the compiler # warnings for bounds checks involving those possible values. While # -Wno-alloc-size-larger-than would normally be used here, earlier versions @@ -82,7 +90,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # Warn if there is an enum types mismatch KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion) -KBUILD_CFLAGS += -Wextra KBUILD_CFLAGS += -Wunused # From 05450c48a35810c5025cffb41dbf566cfb122415 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 30 Apr 2025 23:18:49 -0400 Subject: [PATCH 302/559] bcachefs: Kill ERO in __bch2_i_sectors_acct() We won't be root causing this in the immediate future, and it's fairly innocuous - so just log it in the superblock. https://github.com/koverstreet/bcachefs/issues/869 Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 23 +++++++++++++++++++---- fs/bcachefs/sb-errors_format.h | 5 ++++- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 65c2c33d253d..28619b6bd327 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -144,10 +144,25 @@ int __must_check bch2_write_inode_size(struct bch_fs *c, void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, struct quota_res *quota_res, s64 sectors) { - bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, - "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", - inode->v.i_ino, (u64) inode->v.i_blocks, sectors, - inode->ei_inode.bi_sectors); + if (unlikely((s64) inode->v.i_blocks + sectors < 0)) { + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", + inode->v.i_ino, (u64) inode->v.i_blocks, sectors, + inode->ei_inode.bi_sectors); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, buf.buf, &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + + if (sectors < 0) + sectors = -inode->v.i_blocks; + else + sectors = 0; + } + inode->v.i_blocks += sectors; #ifdef CONFIG_BCACHEFS_QUOTA diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index a4ad5924107b..582e77cbaf8f 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -236,6 +236,7 @@ enum bch_fsck_flags { x(inode_has_child_snapshots_wrong, 287, 0) \ x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ + x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ @@ -317,7 +318,9 @@ enum bch_fsck_flags { x(directory_size_mismatch, 303, FSCK_AUTOFIX) \ x(dirent_cf_name_too_big, 304, 0) \ x(dirent_stray_data_after_cf_name, 305, 0) \ - x(MAX, 308, 0) + x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ + x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ + x(MAX, 312, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From 3a72e369412d6bf7f6a8af410ac4bdd7d48deb62 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 30 Apr 2025 23:56:00 -0400 Subject: [PATCH 303/559] bcachefs: check for inode.bi_sectors underflow Signed-off-by: Kent Overstreet --- fs/bcachefs/io_write.c | 21 +++++++++++++++++++++ fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index a418fa62f09d..c1237da079ed 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -255,6 +255,27 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, } if (i_sectors_delta) { + s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors); + if (unlikely(bi_sectors + i_sectors_delta < 0)) { + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0", + extent_iter->pos.inode, bi_sectors, i_sectors_delta); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, inode_i_sectors_underflow, buf.buf, + &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + + if (i_sectors_delta < 0) + i_sectors_delta = -bi_sectors; + else + i_sectors_delta = 0; + } + le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta); inode_update_flags = 0; } diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 582e77cbaf8f..822a0b42432d 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -236,6 +236,7 @@ enum bch_fsck_flags { x(inode_has_child_snapshots_wrong, 287, 0) \ x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ + x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \ x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ @@ -320,7 +321,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 312, 0) + x(MAX, 313, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From e660d7ca7488214ae19d964d196f89b4237ec829 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 1 May 2025 00:01:29 -0400 Subject: [PATCH 304/559] bcachefs: Kill ERO for i_blocks check in truncate Replace with logging the error in the superblock. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 21 ++++++++++++++++----- fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 28619b6bd327..9657144666b8 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -517,11 +517,22 @@ int bchfs_truncate(struct mnt_idmap *idmap, goto err; } - bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && - !bch2_journal_error(&c->journal), c, - "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", - inode->v.i_ino, (u64) inode->v.i_blocks, - inode->ei_inode.bi_sectors); + if (unlikely(!inode->v.i_size && inode->v.i_blocks && + !bch2_journal_error(&c->journal))) { + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, + "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", + inode->v.i_ino, (u64) inode->v.i_blocks, + inode->ei_inode.bi_sectors); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, buf.buf, + &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + } ret = bch2_setattr_nonsize(idmap, inode, iattr); err: diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 822a0b42432d..3b69a924086f 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -238,6 +238,7 @@ enum bch_fsck_flags { x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \ x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \ + x(vfs_inode_i_blocks_not_zero_at_truncate, 313, FSCK_AUTOFIX) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ @@ -321,7 +322,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 313, 0) + x(MAX, 314, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From 87ec7d5249bb8ebf40261420da069fa238c21789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 3 Apr 2025 13:25:24 +0200 Subject: [PATCH 305/559] KVM: RISC-V: reset smstateen CSRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not resetting smstateen is a potential security hole, because VU might be able to access state that VS does not properly context-switch. Fixes: 81f0f314fec9 ("RISCV: KVM: Add sstateen0 context save/restore") Signed-off-by: Radim Krčmář Link: https://lore.kernel.org/r/20250403112522.1566629-8-rkrcmar@ventanamicro.com Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 60d684c76c58..02635bac91f1 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -77,6 +77,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) memcpy(cntx, reset_cntx, sizeof(*cntx)); spin_unlock(&vcpu->arch.reset_cntx_lock); + memset(&vcpu->arch.smstateen_csr, 0, sizeof(vcpu->arch.smstateen_csr)); + kvm_riscv_vcpu_fp_reset(vcpu); kvm_riscv_vcpu_vector_reset(vcpu); From c59f7c9661b9d3ee33a21d7b4f1dd4b77079e3e7 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 30 Apr 2025 20:15:48 -0300 Subject: [PATCH 306/559] smb: client: ensure aligned IO sizes Make all IO sizes multiple of PAGE_SIZE, either negotiated by the server or passed through rsize, wsize and bsize mount options, to prevent from breaking DIO reads and writes against servers that enforce alignment as specified in MS-FSA 2.1.5.3 and 2.1.5.4. Cc: linux-cifs@vger.kernel.org Reviewed-by: David Howells Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/connect.c | 23 +------------------ fs/smb/client/file.c | 6 ++--- fs/smb/client/fs_context.c | 25 +++++--------------- fs/smb/client/fs_context.h | 47 ++++++++++++++++++++++++++++++++++++++ fs/smb/client/smb1ops.c | 8 +++---- fs/smb/client/smb2pdu.c | 8 ++----- 6 files changed, 62 insertions(+), 55 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index df976ce6aed9..6bf04d9a5491 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3753,28 +3753,7 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) } } - /* - * Clamp the rsize/wsize mount arguments if they are too big for the server - * and set the rsize/wsize to the negotiated values if not passed in by - * the user on mount - */ - if ((cifs_sb->ctx->wsize == 0) || - (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) { - cifs_sb->ctx->wsize = - round_down(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE); - /* - * in the very unlikely event that the server sent a max write size under PAGE_SIZE, - * (which would get rounded down to 0) then reset wsize to absolute minimum eg 4096 - */ - if (cifs_sb->ctx->wsize == 0) { - cifs_sb->ctx->wsize = PAGE_SIZE; - cifs_dbg(VFS, "wsize too small, reset to minimum ie PAGE_SIZE, usually 4096\n"); - } - } - if ((cifs_sb->ctx->rsize == 0) || - (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx))) - cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx); - + cifs_negotiate_iosize(server, cifs_sb->ctx, tcon); /* * The cookie is initialized from volume info returned above. * Inside cifs_fscache_get_super_cookie it checks diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 9e8f404b9e56..851b74f557c1 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -160,10 +160,8 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); rdata->server = server; - if (cifs_sb->ctx->rsize == 0) - cifs_sb->ctx->rsize = - server->ops->negotiate_rsize(tlink_tcon(req->cfile->tlink), - cifs_sb->ctx); + cifs_negotiate_rsize(server, cifs_sb->ctx, + tlink_tcon(req->cfile->tlink)); rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &size, &rdata->credits); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 2980941b9667..a634a34d4086 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1021,6 +1021,7 @@ static int smb3_reconfigure(struct fs_context *fc) struct dentry *root = fc->root; struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb); struct cifs_ses *ses = cifs_sb_master_tcon(cifs_sb)->ses; + unsigned int rsize = ctx->rsize, wsize = ctx->wsize; char *new_password = NULL, *new_password2 = NULL; bool need_recon = false; int rc; @@ -1103,11 +1104,8 @@ static int smb3_reconfigure(struct fs_context *fc) STEAL_STRING(cifs_sb, ctx, iocharset); /* if rsize or wsize not passed in on remount, use previous values */ - if (ctx->rsize == 0) - ctx->rsize = cifs_sb->ctx->rsize; - if (ctx->wsize == 0) - ctx->wsize = cifs_sb->ctx->wsize; - + ctx->rsize = rsize ? CIFS_ALIGN_RSIZE(fc, rsize) : cifs_sb->ctx->rsize; + ctx->wsize = wsize ? CIFS_ALIGN_WSIZE(fc, wsize) : cifs_sb->ctx->wsize; smb3_cleanup_fs_context_contents(cifs_sb->ctx); rc = smb3_fs_context_dup(cifs_sb->ctx, ctx); @@ -1312,7 +1310,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, __func__); goto cifs_parse_mount_err; } - ctx->bsize = result.uint_32; + ctx->bsize = CIFS_ALIGN_BSIZE(fc, result.uint_32); ctx->got_bsize = true; break; case Opt_rasize: @@ -1336,24 +1334,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->rasize = result.uint_32; break; case Opt_rsize: - ctx->rsize = result.uint_32; + ctx->rsize = CIFS_ALIGN_RSIZE(fc, result.uint_32); ctx->got_rsize = true; ctx->vol_rsize = ctx->rsize; break; case Opt_wsize: - ctx->wsize = result.uint_32; + ctx->wsize = CIFS_ALIGN_WSIZE(fc, result.uint_32); ctx->got_wsize = true; - if (ctx->wsize % PAGE_SIZE != 0) { - ctx->wsize = round_down(ctx->wsize, PAGE_SIZE); - if (ctx->wsize == 0) { - ctx->wsize = PAGE_SIZE; - cifs_dbg(VFS, "wsize too small, reset to minimum %ld\n", PAGE_SIZE); - } else { - cifs_dbg(VFS, - "wsize rounded down to %d to multiple of PAGE_SIZE %ld\n", - ctx->wsize, PAGE_SIZE); - } - } ctx->vol_wsize = ctx->wsize; break; case Opt_acregmax: diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index d1d29249bcdb..9e83302ce4b8 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -20,6 +20,21 @@ cifs_dbg(VFS, fmt, ## __VA_ARGS__); \ } while (0) +static inline size_t cifs_io_align(struct fs_context *fc, + const char *name, size_t size) +{ + if (!size || !IS_ALIGNED(size, PAGE_SIZE)) { + cifs_errorf(fc, "unaligned %s, making it a multiple of %lu bytes\n", + name, PAGE_SIZE); + size = umax(round_down(size, PAGE_SIZE), PAGE_SIZE); + } + return size; +} + +#define CIFS_ALIGN_WSIZE(_fc, _size) cifs_io_align(_fc, "wsize", _size) +#define CIFS_ALIGN_RSIZE(_fc, _size) cifs_io_align(_fc, "rsize", _size) +#define CIFS_ALIGN_BSIZE(_fc, _size) cifs_io_align(_fc, "bsize", _size) + enum smb_version { Smb_1 = 1, Smb_20, @@ -361,4 +376,36 @@ static inline void cifs_mount_unlock(void) mutex_unlock(&cifs_mount_mutex); } +static inline void cifs_negotiate_rsize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + unsigned int size; + + size = umax(server->ops->negotiate_rsize(tcon, ctx), PAGE_SIZE); + if (ctx->rsize) + size = umax(umin(ctx->rsize, size), PAGE_SIZE); + ctx->rsize = round_down(size, PAGE_SIZE); +} + +static inline void cifs_negotiate_wsize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + unsigned int size; + + size = umax(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE); + if (ctx->wsize) + size = umax(umin(ctx->wsize, size), PAGE_SIZE); + ctx->wsize = round_down(size, PAGE_SIZE); +} + +static inline void cifs_negotiate_iosize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + cifs_negotiate_rsize(server, ctx, tcon); + cifs_negotiate_wsize(server, ctx, tcon); +} + #endif diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index ab26b9e9b0e7..b27a182629ec 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -432,7 +432,7 @@ cifs_negotiate(const unsigned int xid, } static unsigned int -cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) +smb1_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; @@ -467,7 +467,7 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) } static unsigned int -cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) +smb1_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; @@ -1342,8 +1342,8 @@ struct smb_version_operations smb1_operations = { .check_trans2 = cifs_check_trans2, .need_neg = cifs_need_neg, .negotiate = cifs_negotiate, - .negotiate_wsize = cifs_negotiate_wsize, - .negotiate_rsize = cifs_negotiate_rsize, + .negotiate_wsize = smb1_negotiate_wsize, + .negotiate_rsize = smb1_negotiate_rsize, .sess_setup = CIFS_SessSetup, .logoff = CIFSSMBLogoff, .tree_connect = CIFSTCon, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index a9a49555d43b..0b35816d551f 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4093,12 +4093,8 @@ static void cifs_renegotiate_iosize(struct TCP_Server_Info *server, return; spin_lock(&tcon->sb_list_lock); - list_for_each_entry(cifs_sb, &tcon->cifs_sb_list, tcon_sb_link) { - cifs_sb->ctx->rsize = - server->ops->negotiate_rsize(tcon, cifs_sb->ctx); - cifs_sb->ctx->wsize = - server->ops->negotiate_wsize(tcon, cifs_sb->ctx); - } + list_for_each_entry(cifs_sb, &tcon->cifs_sb_list, tcon_sb_link) + cifs_negotiate_iosize(server, cifs_sb->ctx, tcon); spin_unlock(&tcon->sb_list_lock); } From 927069d5c40c1cfa7b2d13cfc6d7d58bc6f85c50 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Wed, 30 Apr 2025 10:03:43 -0700 Subject: [PATCH 307/559] bnxt_en: fix module unload sequence Recent updates to the PTP part of bnxt changed the way PTP FIFO is cleared, skbs waiting for TX timestamps are now cleared during ndo_close() call. To do clearing procedure, the ptp structure must exist and point to a valid address. Module destroy sequence had ptp clear code running before netdev close causing invalid memory access and kernel crash. Change the sequence to destroy ptp structure after device close. Fixes: 8f7ae5a85137 ("bnxt_en: improve TX timestamping FIFO configuration") Reported-by: Taehee Yoo Closes: https://lore.kernel.org/netdev/CAMArcTWDe2cd41=ub=zzvYifaYcYv-N-csxfqxUvejy_L0D6UQ@mail.gmail.com/ Signed-off-by: Vadim Fedorenko Reviewed-by: Jacob Keller Reviewed-by: Michael Chan Tested-by: Taehee Yoo Link: https://patch.msgid.link/20250430170343.759126-1-vadfed@meta.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 78e496b0ec26..86a5de44b6f3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -16006,8 +16006,8 @@ static void bnxt_remove_one(struct pci_dev *pdev) bnxt_rdma_aux_device_del(bp); - bnxt_ptp_clear(bp); unregister_netdev(dev); + bnxt_ptp_clear(bp); bnxt_rdma_aux_device_uninit(bp); From f920436a44295ca791ebb6dae3f4190142eec703 Mon Sep 17 00:00:00 2001 From: Jibin Zhang Date: Tue, 29 Apr 2025 09:59:48 +0800 Subject: [PATCH 308/559] net: use sock_gen_put() when sk_state is TCP_TIME_WAIT It is possible for a pointer of type struct inet_timewait_sock to be returned from the functions __inet_lookup_established() and __inet6_lookup_established(). This can cause a crash when the returned pointer is of type struct inet_timewait_sock and sock_put() is called on it. The following is a crash call stack that shows sk->sk_wmem_alloc being accessed in sk_free() during the call to sock_put() on a struct inet_timewait_sock pointer. To avoid this issue, use sock_gen_put() instead of sock_put() when sk->sk_state is TCP_TIME_WAIT. mrdump.ko ipanic() + 120 vmlinux notifier_call_chain(nr_to_call=-1, nr_calls=0) + 132 vmlinux atomic_notifier_call_chain(val=0) + 56 vmlinux panic() + 344 vmlinux add_taint() + 164 vmlinux end_report() + 136 vmlinux kasan_report(size=0) + 236 vmlinux report_tag_fault() + 16 vmlinux do_tag_recovery() + 16 vmlinux __do_kernel_fault() + 88 vmlinux do_bad_area() + 28 vmlinux do_tag_check_fault() + 60 vmlinux do_mem_abort() + 80 vmlinux el1_abort() + 56 vmlinux el1h_64_sync_handler() + 124 vmlinux > 0xFFFFFFC080011294() vmlinux __lse_atomic_fetch_add_release(v=0xF2FFFF82A896087C) vmlinux __lse_atomic_fetch_sub_release(v=0xF2FFFF82A896087C) vmlinux arch_atomic_fetch_sub_release(i=1, v=0xF2FFFF82A896087C) + 8 vmlinux raw_atomic_fetch_sub_release(i=1, v=0xF2FFFF82A896087C) + 8 vmlinux atomic_fetch_sub_release(i=1, v=0xF2FFFF82A896087C) + 8 vmlinux __refcount_sub_and_test(i=1, r=0xF2FFFF82A896087C, oldp=0) + 8 vmlinux __refcount_dec_and_test(r=0xF2FFFF82A896087C, oldp=0) + 8 vmlinux refcount_dec_and_test(r=0xF2FFFF82A896087C) + 8 vmlinux sk_free(sk=0xF2FFFF82A8960700) + 28 vmlinux sock_put() + 48 vmlinux tcp6_check_fraglist_gro() + 236 vmlinux tcp6_gro_receive() + 624 vmlinux ipv6_gro_receive() + 912 vmlinux dev_gro_receive() + 1116 vmlinux napi_gro_receive() + 196 ccmni.ko ccmni_rx_callback() + 208 ccmni.ko ccmni_queue_recv_skb() + 388 ccci_dpmaif.ko dpmaif_rxq_push_thread() + 1088 vmlinux kthread() + 268 vmlinux 0xFFFFFFC08001F30C() Fixes: c9d1d23e5239 ("net: add heuristic for enabling TCP fraglist GRO") Signed-off-by: Jibin Zhang Signed-off-by: Shiming Cheng Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250429020412.14163-1-shiming.cheng@mediatek.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_offload.c | 2 +- net/ipv6/tcpv6_offload.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 934f777f29d3..d293087b426d 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -439,7 +439,7 @@ static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, iif, sdif); NAPI_GRO_CB(skb)->is_flist = !sk; if (sk) - sock_put(sk); + sock_gen_put(sk); } INDIRECT_CALLABLE_SCOPE diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index d9b11fe41bf0..a8a04f441e78 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -42,7 +42,7 @@ static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, iif, sdif); NAPI_GRO_CB(skb)->is_flist = !sk; if (sk) - sock_put(sk); + sock_gen_put(sk); #endif /* IS_ENABLED(CONFIG_IPV6) */ } From e98386d79a23c57cf179fe4138322e277aa3aa74 Mon Sep 17 00:00:00 2001 From: Sagi Maimon Date: Tue, 29 Apr 2025 10:33:20 +0300 Subject: [PATCH 309/559] ptp: ocp: Fix NULL dereference in Adva board SMA sysfs operations On Adva boards, SMA sysfs store/get operations can call __handle_signal_outputs() or __handle_signal_inputs() while the `irig` and `dcf` pointers are uninitialized, leading to a NULL pointer dereference in __handle_signal() and causing a kernel crash. Adva boards don't use `irig` or `dcf` functionality, so add Adva-specific callbacks `ptp_ocp_sma_adva_set_outputs()` and `ptp_ocp_sma_adva_set_inputs()` that avoid invoking `irig` or `dcf` input/output routines. Fixes: ef61f5528fca ("ptp: ocp: add Adva timecard support") Signed-off-by: Sagi Maimon Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250429073320.33277-1-maimon.sagi@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 52 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index faf6e027f89a..2ccdca4f6960 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -2578,12 +2578,60 @@ static const struct ocp_sma_op ocp_fb_sma_op = { .set_output = ptp_ocp_sma_fb_set_output, }; +static int +ptp_ocp_sma_adva_set_output(struct ptp_ocp *bp, int sma_nr, u32 val) +{ + u32 reg, mask, shift; + unsigned long flags; + u32 __iomem *gpio; + + gpio = sma_nr > 2 ? &bp->sma_map1->gpio2 : &bp->sma_map2->gpio2; + shift = sma_nr & 1 ? 0 : 16; + + mask = 0xffff << (16 - shift); + + spin_lock_irqsave(&bp->lock, flags); + + reg = ioread32(gpio); + reg = (reg & mask) | (val << shift); + + iowrite32(reg, gpio); + + spin_unlock_irqrestore(&bp->lock, flags); + + return 0; +} + +static int +ptp_ocp_sma_adva_set_inputs(struct ptp_ocp *bp, int sma_nr, u32 val) +{ + u32 reg, mask, shift; + unsigned long flags; + u32 __iomem *gpio; + + gpio = sma_nr > 2 ? &bp->sma_map2->gpio1 : &bp->sma_map1->gpio1; + shift = sma_nr & 1 ? 0 : 16; + + mask = 0xffff << (16 - shift); + + spin_lock_irqsave(&bp->lock, flags); + + reg = ioread32(gpio); + reg = (reg & mask) | (val << shift); + + iowrite32(reg, gpio); + + spin_unlock_irqrestore(&bp->lock, flags); + + return 0; +} + static const struct ocp_sma_op ocp_adva_sma_op = { .tbl = { ptp_ocp_adva_sma_in, ptp_ocp_adva_sma_out }, .init = ptp_ocp_sma_fb_init, .get = ptp_ocp_sma_fb_get, - .set_inputs = ptp_ocp_sma_fb_set_inputs, - .set_output = ptp_ocp_sma_fb_set_output, + .set_inputs = ptp_ocp_sma_adva_set_inputs, + .set_output = ptp_ocp_sma_adva_set_output, }; static int From 2d52e2e38b85c8b7bc00dca55c2499f46f8c8198 Mon Sep 17 00:00:00 2001 From: Thangaraj Samynathan Date: Tue, 29 Apr 2025 10:55:27 +0530 Subject: [PATCH 310/559] net: lan743x: Fix memleak issue when GSO enabled Always map the `skb` to the LS descriptor. Previously skb was mapped to EXT descriptor when the number of fragments is zero with GSO enabled. Mapping the skb to EXT descriptor prevents it from being freed, leading to a memory leak Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Thangaraj Samynathan Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250429052527.10031-1-thangaraj.s@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 8 ++++++-- drivers/net/ethernet/microchip/lan743x_main.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 23760b613d3e..e2d6bfb5d693 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1815,6 +1815,7 @@ static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx, if (nr_frags <= 0) { tx->frame_data0 |= TX_DESC_DATA0_LS_; tx->frame_data0 |= TX_DESC_DATA0_IOC_; + tx->frame_last = tx->frame_first; } tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); @@ -1884,6 +1885,7 @@ static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx, tx->frame_first = 0; tx->frame_data0 = 0; tx->frame_tail = 0; + tx->frame_last = 0; return -ENOMEM; } @@ -1924,16 +1926,18 @@ static void lan743x_tx_frame_end(struct lan743x_tx *tx, TX_DESC_DATA0_DTYPE_DATA_) { tx->frame_data0 |= TX_DESC_DATA0_LS_; tx->frame_data0 |= TX_DESC_DATA0_IOC_; + tx->frame_last = tx->frame_tail; } - tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; - buffer_info = &tx->buffer_info[tx->frame_tail]; + tx_descriptor = &tx->ring_cpu_ptr[tx->frame_last]; + buffer_info = &tx->buffer_info[tx->frame_last]; buffer_info->skb = skb; if (time_stamp) buffer_info->flags |= TX_BUFFER_INFO_FLAG_TIMESTAMP_REQUESTED; if (ignore_sync) buffer_info->flags |= TX_BUFFER_INFO_FLAG_IGNORE_SYNC; + tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail); tx->last_tail = tx->frame_tail; diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 7f73d66854be..db5fc73e41cc 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -980,6 +980,7 @@ struct lan743x_tx { u32 frame_first; u32 frame_data0; u32 frame_tail; + u32 frame_last; struct lan743x_tx_buffer_info *buffer_info; From a179aad12badc43201cbf45d1e8ed2c1383c76b9 Mon Sep 17 00:00:00 2001 From: Mattias Barthel Date: Tue, 29 Apr 2025 11:08:26 +0200 Subject: [PATCH 311/559] net: fec: ERR007885 Workaround for conventional TX Activate TX hang workaround also in fec_enet_txq_submit_skb() when TSO is not enabled. Errata: ERR007885 Symptoms: NETDEV WATCHDOG: eth0 (fec): transmit queue 0 timed out commit 37d6017b84f7 ("net: fec: Workaround for imx6sx enet tx hang when enable three queues") There is a TDAR race condition for mutliQ when the software sets TDAR and the UDMA clears TDAR simultaneously or in a small window (2-4 cycles). This will cause the udma_tx and udma_tx_arbiter state machines to hang. So, the Workaround is checking TDAR status four time, if TDAR cleared by hardware and then write TDAR, otherwise don't set TDAR. Fixes: 53bb20d1faba ("net: fec: add variable reg_desc_active to speed things up") Signed-off-by: Mattias Barthel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250429090826.3101258-1-mattiasbarthel@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index a86cfebedaa8..17e9bddb9ddd 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -714,7 +714,12 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, txq->bd.cur = bdp; /* Trigger transmission start */ - writel(0, txq->bd.reg_desc_active); + if (!(fep->quirks & FEC_QUIRK_ERR007885) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active)) + writel(0, txq->bd.reg_desc_active); return 0; } From 34f42736b325287a7b2ce37e415838f539767bda Mon Sep 17 00:00:00 2001 From: Sathesh B Edara Date: Tue, 29 Apr 2025 04:46:24 -0700 Subject: [PATCH 312/559] octeon_ep: Fix host hang issue during device reboot When the host loses heartbeat messages from the device, the driver calls the device-specific ndo_stop function, which frees the resources. If the driver is unloaded in this scenario, it calls ndo_stop again, attempting to free resources that have already been freed, leading to a host hang issue. To resolve this, dev_close should be called instead of the device-specific stop function.dev_close internally calls ndo_stop to stop the network interface and performs additional cleanup tasks. During the driver unload process, if the device is already down, ndo_stop is not called. Fixes: 5cb96c29aa0e ("octeon_ep: add heartbeat monitor") Signed-off-by: Sathesh B Edara Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250429114624.19104-1-sedara@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 0a679e95196f..24499bb36c00 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1223,7 +1223,7 @@ static void octep_hb_timeout_task(struct work_struct *work) miss_cnt); rtnl_lock(); if (netif_running(oct->netdev)) - octep_stop(oct->netdev); + dev_close(oct->netdev); rtnl_unlock(); } From ef2383d078edcbe3055032436b16cdf206f26de2 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 30 Apr 2025 17:30:49 +0800 Subject: [PATCH 313/559] net: hns3: store rx VLAN tag offload state for VF The VF driver missed to store the rx VLAN tag strip state when user change the rx VLAN tag offload state. And it will default to enable the rx vlan tag strip when re-init VF device after reset. So if user disable rx VLAN tag offload, and trig reset, then the HW will still strip the VLAN tag from packet nad fill into RX BD, but the VF driver will ignore it for rx VLAN tag offload disabled. It may cause the rx VLAN tag dropped. Fixes: b2641e2ad456 ("net: hns3: Add support of hardware rx-vlan-offload to HNS3 VF driver") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250430093052.2400464-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../hisilicon/hns3/hns3vf/hclgevf_main.c | 25 ++++++++++++++----- .../hisilicon/hns3/hns3vf/hclgevf_main.h | 1 + 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 9ba767740a04..dada42e7e0ec 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1292,9 +1292,8 @@ static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev) rtnl_unlock(); } -static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) +static int hclgevf_en_hw_strip_rxvtag_cmd(struct hclgevf_dev *hdev, bool enable) { - struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); struct hclge_vf_to_pf_msg send_msg; hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN, @@ -1303,6 +1302,19 @@ static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); } +static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) +{ + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + int ret; + + ret = hclgevf_en_hw_strip_rxvtag_cmd(hdev, enable); + if (ret) + return ret; + + hdev->rxvtag_strip_en = enable; + return 0; +} + static int hclgevf_reset_tqp(struct hnae3_handle *handle) { #define HCLGEVF_RESET_ALL_QUEUE_DONE 1U @@ -2204,12 +2216,13 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) tc_valid, tc_size); } -static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) +static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev, + bool rxvtag_strip_en) { struct hnae3_handle *nic = &hdev->nic; int ret; - ret = hclgevf_en_hw_strip_rxvtag(nic, true); + ret = hclgevf_en_hw_strip_rxvtag(nic, rxvtag_strip_en); if (ret) { dev_err(&hdev->pdev->dev, "failed to enable rx vlan offload, ret = %d\n", ret); @@ -2879,7 +2892,7 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev) if (ret) return ret; - ret = hclgevf_init_vlan_config(hdev); + ret = hclgevf_init_vlan_config(hdev, hdev->rxvtag_strip_en); if (ret) { dev_err(&hdev->pdev->dev, "failed(%d) to initialize VLAN config\n", ret); @@ -2994,7 +3007,7 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) goto err_config; } - ret = hclgevf_init_vlan_config(hdev); + ret = hclgevf_init_vlan_config(hdev, true); if (ret) { dev_err(&hdev->pdev->dev, "failed(%d) to initialize VLAN config\n", ret); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index cccef3228461..0208425ab594 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -253,6 +253,7 @@ struct hclgevf_dev { int *vector_irq; bool gro_en; + bool rxvtag_strip_en; unsigned long vlan_del_fail_bmap[BITS_TO_LONGS(VLAN_N_VID)]; From 8e6b9c6ea5a55045eed6526d8ee49e93192d1a58 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Wed, 30 Apr 2025 17:30:50 +0800 Subject: [PATCH 314/559] net: hns3: fix an interrupt residual problem When a VF is passthrough to a VM, and the VM is killed, the reported interrupt may not been handled, it will remain, and won't be clear by the nic engine even with a flr or tqp reset. When the VM restart, the interrupt of the first vector may be dropped by the second enable_irq in vfio, see the issue below: https://gitlab.com/qemu-project/qemu/-/issues/2884#note_2423361621 We notice that the vfio has always behaved this way, and the interrupt is a residue of the nic engine, so we fix the problem by moving the vector enable process out of the enable_irq loop. Fixes: 08a100689d4b ("net: hns3: re-organize vector handle") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20250430093052.2400464-3-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/hisilicon/hns3/hns3_enet.c | 82 +++++++++---------- 1 file changed, 39 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 9ff797fb36c4..b03b8758c777 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -473,20 +473,14 @@ static void hns3_mask_vector_irq(struct hns3_enet_tqp_vector *tqp_vector, writel(mask_en, tqp_vector->mask_addr); } -static void hns3_vector_enable(struct hns3_enet_tqp_vector *tqp_vector) +static void hns3_irq_enable(struct hns3_enet_tqp_vector *tqp_vector) { napi_enable(&tqp_vector->napi); enable_irq(tqp_vector->vector_irq); - - /* enable vector */ - hns3_mask_vector_irq(tqp_vector, 1); } -static void hns3_vector_disable(struct hns3_enet_tqp_vector *tqp_vector) +static void hns3_irq_disable(struct hns3_enet_tqp_vector *tqp_vector) { - /* disable vector */ - hns3_mask_vector_irq(tqp_vector, 0); - disable_irq(tqp_vector->vector_irq); napi_disable(&tqp_vector->napi); cancel_work_sync(&tqp_vector->rx_group.dim.work); @@ -707,11 +701,42 @@ static int hns3_set_rx_cpu_rmap(struct net_device *netdev) return 0; } +static void hns3_enable_irqs_and_tqps(struct net_device *netdev) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hnae3_handle *h = priv->ae_handle; + u16 i; + + for (i = 0; i < priv->vector_num; i++) + hns3_irq_enable(&priv->tqp_vector[i]); + + for (i = 0; i < priv->vector_num; i++) + hns3_mask_vector_irq(&priv->tqp_vector[i], 1); + + for (i = 0; i < h->kinfo.num_tqps; i++) + hns3_tqp_enable(h->kinfo.tqp[i]); +} + +static void hns3_disable_irqs_and_tqps(struct net_device *netdev) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hnae3_handle *h = priv->ae_handle; + u16 i; + + for (i = 0; i < h->kinfo.num_tqps; i++) + hns3_tqp_disable(h->kinfo.tqp[i]); + + for (i = 0; i < priv->vector_num; i++) + hns3_mask_vector_irq(&priv->tqp_vector[i], 0); + + for (i = 0; i < priv->vector_num; i++) + hns3_irq_disable(&priv->tqp_vector[i]); +} + static int hns3_nic_net_up(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); struct hnae3_handle *h = priv->ae_handle; - int i, j; int ret; ret = hns3_nic_reset_all_ring(h); @@ -720,23 +745,13 @@ static int hns3_nic_net_up(struct net_device *netdev) clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); - /* enable the vectors */ - for (i = 0; i < priv->vector_num; i++) - hns3_vector_enable(&priv->tqp_vector[i]); - - /* enable rcb */ - for (j = 0; j < h->kinfo.num_tqps; j++) - hns3_tqp_enable(h->kinfo.tqp[j]); + hns3_enable_irqs_and_tqps(netdev); /* start the ae_dev */ ret = h->ae_algo->ops->start ? h->ae_algo->ops->start(h) : 0; if (ret) { set_bit(HNS3_NIC_STATE_DOWN, &priv->state); - while (j--) - hns3_tqp_disable(h->kinfo.tqp[j]); - - for (j = i - 1; j >= 0; j--) - hns3_vector_disable(&priv->tqp_vector[j]); + hns3_disable_irqs_and_tqps(netdev); } return ret; @@ -823,17 +838,9 @@ static void hns3_reset_tx_queue(struct hnae3_handle *h) static void hns3_nic_net_down(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = hns3_get_handle(netdev); const struct hnae3_ae_ops *ops; - int i; - /* disable vectors */ - for (i = 0; i < priv->vector_num; i++) - hns3_vector_disable(&priv->tqp_vector[i]); - - /* disable rcb */ - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_disable(h->kinfo.tqp[i]); + hns3_disable_irqs_and_tqps(netdev); /* stop ae_dev */ ops = priv->ae_handle->ae_algo->ops; @@ -5864,8 +5871,6 @@ int hns3_set_channels(struct net_device *netdev, void hns3_external_lb_prepare(struct net_device *ndev, bool if_running) { struct hns3_nic_priv *priv = netdev_priv(ndev); - struct hnae3_handle *h = priv->ae_handle; - int i; if (!if_running) return; @@ -5876,11 +5881,7 @@ void hns3_external_lb_prepare(struct net_device *ndev, bool if_running) netif_carrier_off(ndev); netif_tx_disable(ndev); - for (i = 0; i < priv->vector_num; i++) - hns3_vector_disable(&priv->tqp_vector[i]); - - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_disable(h->kinfo.tqp[i]); + hns3_disable_irqs_and_tqps(ndev); /* delay ring buffer clearing to hns3_reset_notify_uninit_enet * during reset process, because driver may not be able @@ -5896,7 +5897,6 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running) { struct hns3_nic_priv *priv = netdev_priv(ndev); struct hnae3_handle *h = priv->ae_handle; - int i; if (!if_running) return; @@ -5912,11 +5912,7 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running) clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); - for (i = 0; i < priv->vector_num; i++) - hns3_vector_enable(&priv->tqp_vector[i]); - - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_enable(h->kinfo.tqp[i]); + hns3_enable_irqs_and_tqps(ndev); netif_tx_wake_all_queues(ndev); From e317aebeefcb3b0c71f2305af3c22871ca6b3833 Mon Sep 17 00:00:00 2001 From: Hao Lan Date: Wed, 30 Apr 2025 17:30:51 +0800 Subject: [PATCH 315/559] net: hns3: fixed debugfs tm_qset size The size of the tm_qset file of debugfs is limited to 64 KB, which is too small in the scenario with 1280 qsets. The size needs to be expanded to 1 MB. Fixes: 5e69ea7ee2a6 ("net: hns3: refactor the debugfs process") Signed-off-by: Hao Lan Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20250430093052.2400464-4-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 09749e9f7398..4e5d8bc39a1b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -61,7 +61,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .name = "tm_qset", .cmd = HNAE3_DBG_CMD_TM_QSET, .dentry = HNS3_DBG_DENTRY_TM, - .buf_len = HNS3_DBG_READ_LEN, + .buf_len = HNS3_DBG_READ_LEN_1MB, .init = hns3_dbg_common_file_init, }, { From 4971394d9d624f91689d766f31ce668d169d9959 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 30 Apr 2025 17:30:52 +0800 Subject: [PATCH 316/559] net: hns3: defer calling ptp_clock_register() Currently the ptp_clock_register() is called before relative ptp resource ready. It may cause unexpected result when upper layer called the ptp API during the timewindow. Fix it by moving the ptp_clock_register() to the function end. Fixes: 0bf5eb788512 ("net: hns3: add support for PTP") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250430093052.2400464-5-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index 59cc9221185f..ec581d4b696f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -440,6 +440,13 @@ static int hclge_ptp_create_clock(struct hclge_dev *hdev) ptp->info.settime64 = hclge_ptp_settime; ptp->info.n_alarm = 0; + + spin_lock_init(&ptp->lock); + ptp->io_base = hdev->hw.hw.io_base + HCLGE_PTP_REG_OFFSET; + ptp->ts_cfg.rx_filter = HWTSTAMP_FILTER_NONE; + ptp->ts_cfg.tx_type = HWTSTAMP_TX_OFF; + hdev->ptp = ptp; + ptp->clock = ptp_clock_register(&ptp->info, &hdev->pdev->dev); if (IS_ERR(ptp->clock)) { dev_err(&hdev->pdev->dev, @@ -451,12 +458,6 @@ static int hclge_ptp_create_clock(struct hclge_dev *hdev) return -ENODEV; } - spin_lock_init(&ptp->lock); - ptp->io_base = hdev->hw.hw.io_base + HCLGE_PTP_REG_OFFSET; - ptp->ts_cfg.rx_filter = HWTSTAMP_FILTER_NONE; - ptp->ts_cfg.tx_type = HWTSTAMP_TX_OFF; - hdev->ptp = ptp; - return 0; } From 55f362885951b2d00fd7fbb02ef0227deea572c2 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:40 +0200 Subject: [PATCH 317/559] net: vertexcom: mse102x: Fix possible stuck of SPI interrupt The MSE102x doesn't provide any SPI commands for interrupt handling. So in case the interrupt fired before the driver requests the IRQ, the interrupt will never fire again. In order to fix this always poll for pending packets after opening the interface. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-2-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/vertexcom/mse102x.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 89dc4c401a8d..92ebf1633159 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -509,6 +509,7 @@ static irqreturn_t mse102x_irq(int irq, void *_mse) static int mse102x_net_open(struct net_device *ndev) { struct mse102x_net *mse = netdev_priv(ndev); + struct mse102x_net_spi *mses = to_mse102x_spi(mse); int ret; ret = request_threaded_irq(ndev->irq, NULL, mse102x_irq, IRQF_ONESHOT, @@ -524,6 +525,13 @@ static int mse102x_net_open(struct net_device *ndev) netif_carrier_on(ndev); + /* The SPI interrupt can stuck in case of pending packet(s). + * So poll for possible packet(s) to re-arm the interrupt. + */ + mutex_lock(&mses->lock); + mse102x_rx_pkt_spi(mse); + mutex_unlock(&mses->lock); + netif_dbg(mse, ifup, ndev, "network device up\n"); return 0; From 74987089ec678b4018dba0a609e9f4bf6ef7f4ad Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:41 +0200 Subject: [PATCH 318/559] net: vertexcom: mse102x: Fix LEN_MASK The LEN_MASK for CMD_RTS doesn't cover the whole parameter mask. The Bit 11 is reserved, so adjust LEN_MASK accordingly. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-3-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/vertexcom/mse102x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 92ebf1633159..3edf2c3753f0 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -33,7 +33,7 @@ #define CMD_CTR (0x2 << CMD_SHIFT) #define CMD_MASK GENMASK(15, CMD_SHIFT) -#define LEN_MASK GENMASK(CMD_SHIFT - 1, 0) +#define LEN_MASK GENMASK(CMD_SHIFT - 2, 0) #define DET_CMD_LEN 4 #define DET_SOF_LEN 2 From d4dda902dac194e3231a1ed0f76c6c3b6340ba8a Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:42 +0200 Subject: [PATCH 319/559] net: vertexcom: mse102x: Add range check for CMD_RTS Since there is no protection in the SPI protocol against electrical interferences, the driver shouldn't blindly trust the length payload of CMD_RTS. So introduce a bounds check for incoming frames. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-4-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/vertexcom/mse102x.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 3edf2c3753f0..2c06d1d05164 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -337,8 +338,9 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) } rxlen = cmd_resp & LEN_MASK; - if (!rxlen) { - net_dbg_ratelimited("%s: No frame length defined\n", __func__); + if (rxlen < ETH_ZLEN || rxlen > VLAN_ETH_FRAME_LEN) { + net_dbg_ratelimited("%s: Invalid frame length: %d\n", __func__, + rxlen); mse->stats.invalid_len++; return; } From ee512922ddd7d64afe2b28830a88f19063217649 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:43 +0200 Subject: [PATCH 320/559] net: vertexcom: mse102x: Fix RX error handling In case the CMD_RTS got corrupted by interferences, the MSE102x doesn't allow a retransmission of the command. Instead the Ethernet frame must be shifted out of the SPI FIFO. Since the actual length is unknown, assume the maximum possible value. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-5-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/vertexcom/mse102x.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 2c06d1d05164..e4d993f31374 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -263,7 +263,7 @@ static int mse102x_tx_frame_spi(struct mse102x_net *mse, struct sk_buff *txp, } static int mse102x_rx_frame_spi(struct mse102x_net *mse, u8 *buff, - unsigned int frame_len) + unsigned int frame_len, bool drop) { struct mse102x_net_spi *mses = to_mse102x_spi(mse); struct spi_transfer *xfer = &mses->spi_xfer; @@ -281,6 +281,9 @@ static int mse102x_rx_frame_spi(struct mse102x_net *mse, u8 *buff, netdev_err(mse->ndev, "%s: spi_sync() failed: %d\n", __func__, ret); mse->stats.xfer_err++; + } else if (drop) { + netdev_dbg(mse->ndev, "%s: Drop frame\n", __func__); + ret = -EINVAL; } else if (*sof != cpu_to_be16(DET_SOF)) { netdev_dbg(mse->ndev, "%s: SPI start of frame is invalid (0x%04x)\n", __func__, *sof); @@ -308,6 +311,7 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) struct sk_buff *skb; unsigned int rxalign; unsigned int rxlen; + bool drop = false; __be16 rx = 0; u16 cmd_resp; u8 *rxpkt; @@ -330,7 +334,8 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) net_dbg_ratelimited("%s: Unexpected response (0x%04x)\n", __func__, cmd_resp); mse->stats.invalid_rts++; - return; + drop = true; + goto drop; } net_dbg_ratelimited("%s: Unexpected response to first CMD\n", @@ -342,9 +347,16 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) net_dbg_ratelimited("%s: Invalid frame length: %d\n", __func__, rxlen); mse->stats.invalid_len++; - return; + drop = true; } + /* In case of a invalid CMD_RTS, the frame must be consumed anyway. + * So assume the maximum possible frame length. + */ +drop: + if (drop) + rxlen = VLAN_ETH_FRAME_LEN; + rxalign = ALIGN(rxlen + DET_SOF_LEN + DET_DFT_LEN, 4); skb = netdev_alloc_skb_ip_align(mse->ndev, rxalign); if (!skb) @@ -355,7 +367,7 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) * They are copied, but ignored. */ rxpkt = skb_put(skb, rxlen) - DET_SOF_LEN; - if (mse102x_rx_frame_spi(mse, rxpkt, rxlen)) { + if (mse102x_rx_frame_spi(mse, rxpkt, rxlen, drop)) { mse->ndev->stats.rx_errors++; dev_kfree_skb(skb); return; From be593d9d91c5a3a363d456b9aceb71029aeb3f1d Mon Sep 17 00:00:00 2001 From: Chris Bainbridge Date: Thu, 17 Apr 2025 16:50:05 -0500 Subject: [PATCH 321/559] drm/amd/display: Fix slab-use-after-free in hdcp The HDCP code in amdgpu_dm_hdcp.c copies pointers to amdgpu_dm_connector objects without incrementing the kref reference counts. When using a USB-C dock, and the dock is unplugged, the corresponding amdgpu_dm_connector objects are freed, creating dangling pointers in the HDCP code. When the dock is plugged back, the dangling pointers are dereferenced, resulting in a slab-use-after-free: [ 66.775837] BUG: KASAN: slab-use-after-free in event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.776171] Read of size 4 at addr ffff888127804120 by task kworker/0:1/10 [ 66.776179] CPU: 0 UID: 0 PID: 10 Comm: kworker/0:1 Not tainted 6.14.0-rc7-00180-g54505f727a38-dirty #233 [ 66.776183] Hardware name: HP HP Pavilion Aero Laptop 13-be0xxx/8916, BIOS F.17 12/18/2024 [ 66.776186] Workqueue: events event_property_validate [amdgpu] [ 66.776494] Call Trace: [ 66.776496] [ 66.776497] dump_stack_lvl+0x70/0xa0 [ 66.776504] print_report+0x175/0x555 [ 66.776507] ? __virt_addr_valid+0x243/0x450 [ 66.776510] ? kasan_complete_mode_report_info+0x66/0x1c0 [ 66.776515] kasan_report+0xeb/0x1c0 [ 66.776518] ? event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.776819] ? event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.777121] __asan_report_load4_noabort+0x14/0x20 [ 66.777124] event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.777342] ? __lock_acquire+0x6b40/0x6b40 [ 66.777347] ? enable_assr+0x250/0x250 [amdgpu] [ 66.777571] process_one_work+0x86b/0x1510 [ 66.777575] ? pwq_dec_nr_in_flight+0xcf0/0xcf0 [ 66.777578] ? assign_work+0x16b/0x280 [ 66.777580] ? lock_is_held_type+0xa3/0x130 [ 66.777583] worker_thread+0x5c0/0xfa0 [ 66.777587] ? process_one_work+0x1510/0x1510 [ 66.777588] kthread+0x3a2/0x840 [ 66.777591] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777594] ? trace_hardirqs_on+0x4f/0x60 [ 66.777597] ? _raw_spin_unlock_irq+0x27/0x60 [ 66.777599] ? calculate_sigpending+0x77/0xa0 [ 66.777602] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777605] ret_from_fork+0x40/0x90 [ 66.777607] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777609] ret_from_fork_asm+0x11/0x20 [ 66.777614] [ 66.777643] Allocated by task 10: [ 66.777646] kasan_save_stack+0x39/0x60 [ 66.777649] kasan_save_track+0x14/0x40 [ 66.777652] kasan_save_alloc_info+0x37/0x50 [ 66.777655] __kasan_kmalloc+0xbb/0xc0 [ 66.777658] __kmalloc_cache_noprof+0x1c8/0x4b0 [ 66.777661] dm_dp_add_mst_connector+0xdd/0x5c0 [amdgpu] [ 66.777880] drm_dp_mst_port_add_connector+0x47e/0x770 [drm_display_helper] [ 66.777892] drm_dp_send_link_address+0x1554/0x2bf0 [drm_display_helper] [ 66.777901] drm_dp_check_and_send_link_address+0x187/0x1f0 [drm_display_helper] [ 66.777909] drm_dp_mst_link_probe_work+0x2b8/0x410 [drm_display_helper] [ 66.777917] process_one_work+0x86b/0x1510 [ 66.777919] worker_thread+0x5c0/0xfa0 [ 66.777922] kthread+0x3a2/0x840 [ 66.777925] ret_from_fork+0x40/0x90 [ 66.777927] ret_from_fork_asm+0x11/0x20 [ 66.777932] Freed by task 1713: [ 66.777935] kasan_save_stack+0x39/0x60 [ 66.777938] kasan_save_track+0x14/0x40 [ 66.777940] kasan_save_free_info+0x3b/0x60 [ 66.777944] __kasan_slab_free+0x52/0x70 [ 66.777946] kfree+0x13f/0x4b0 [ 66.777949] dm_dp_mst_connector_destroy+0xfa/0x150 [amdgpu] [ 66.778179] drm_connector_free+0x7d/0xb0 [ 66.778184] drm_mode_object_put.part.0+0xee/0x160 [ 66.778188] drm_mode_object_put+0x37/0x50 [ 66.778191] drm_atomic_state_default_clear+0x220/0xd60 [ 66.778194] __drm_atomic_state_free+0x16e/0x2a0 [ 66.778197] drm_mode_atomic_ioctl+0x15ed/0x2ba0 [ 66.778200] drm_ioctl_kernel+0x17a/0x310 [ 66.778203] drm_ioctl+0x584/0xd10 [ 66.778206] amdgpu_drm_ioctl+0xd2/0x1c0 [amdgpu] [ 66.778375] __x64_sys_ioctl+0x139/0x1a0 [ 66.778378] x64_sys_call+0xee7/0xfb0 [ 66.778381] do_syscall_64+0x87/0x140 [ 66.778385] entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fix this by properly incrementing and decrementing the reference counts when making and deleting copies of the amdgpu_dm_connector pointers. (Mario: rebase on current code and update fixes tag) Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4006 Signed-off-by: Chris Bainbridge Fixes: da3fd7ac0bcf3 ("drm/amd/display: Update CP property based on HW query") Reviewed-by: Alex Hung Link: https://lore.kernel.org/r/20250417215005.37964-1-mario.limonciello@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit d4673f3c3b3dcb74e36e53cdfc880baa7a87b330) Cc: stable@vger.kernel.org --- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 5198a079b463..8f22ad966543 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -173,6 +173,9 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); hdcp_w->aconnector[conn_index] = aconnector; memset(&link_adjust, 0, sizeof(link_adjust)); @@ -220,7 +223,6 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); - hdcp_w->aconnector[conn_index] = aconnector; /* the removal of display will invoke auth reset -> hdcp destroy and * we'd expect the Content Protection (CP) property changed back to @@ -236,7 +238,10 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, } mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); - + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } process_output(hdcp_w); } @@ -254,6 +259,10 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { hdcp_w->encryption_status[conn_index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } } process_output(hdcp_w); @@ -488,6 +497,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) struct hdcp_workqueue *hdcp_work = handle; struct amdgpu_dm_connector *aconnector = config->dm_stream_ctx; int link_index = aconnector->dc_link->link_index; + unsigned int conn_index = aconnector->base.index; struct mod_hdcp_display *display = &hdcp_work[link_index].display; struct mod_hdcp_link *link = &hdcp_work[link_index].link; struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; @@ -544,7 +554,10 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) guard(mutex)(&hdcp_w->mutex); mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output); - + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = aconnector; process_output(hdcp_w); } From 9397204ffae887bd557e7053609174b3eb9d6f5c Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 17 Apr 2025 12:02:09 -0400 Subject: [PATCH 322/559] drm/amdgpu: Fail DMABUF map of XGMI-accessible memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If peer memory is XGMI-accessible, we should never access it through PCIe P2P DMA mappings. PCIe P2P is slower, has different coherence behaviour, limited or no support for atomics, or may not work at all. Fail with a warning if DMABUF mappings of such memory are attempted. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit dbe4c63689bc6b5fd3ab72650ea4b6a667e96a68) --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index e6913fcf2c7b..44e120f9f764 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -199,6 +199,11 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, break; case TTM_PL_VRAM: + /* XGMI-accessible memory should never be DMA-mapped */ + if (WARN_ON(amdgpu_dmabuf_is_xgmi_accessible( + dma_buf_attach_adev(attach), bo))) + return ERR_PTR(-EINVAL); + r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0, bo->tbo.base.size, attach->dev, dir, &sgt); From 79af0604eb80ca1f86a1f265a0b1f9d4fccbc18f Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 21 Apr 2025 13:25:51 +0530 Subject: [PATCH 323/559] drm/amdgpu: Fix offset for HDP remap in nbio v7.11 APUs in passthrough mode use HDP flush. 0x7F000 offset used for remapping HDP flush is mapped to VPE space which could get power gated. Use another unused offset in BIF space. Signed-off-by: Lijo Lazar Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit d8116a32cdbe456c7f511183eb9ab187e3d590fb) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 2ece3ae75ec1..bed5ef4d8788 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -360,7 +360,7 @@ static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_BIF_LS; } -#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) +#define MMIO_REG_HOLE_OFFSET 0x44000 static void nbio_v7_11_set_reg_remap(struct amdgpu_device *adev) { From 6718b10a5b98ad6629cd6b2004b0628fe68beac0 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Wed, 23 Apr 2025 12:32:01 -0400 Subject: [PATCH 324/559] drm/amdgpu: Add DPG pause for VCN v5.0.1 For vcn5.0.1 only, enable DPG PAUSE to avoid DPG resets. Signed-off-by: Sonny Jiang Reviewed-by: Leo Liu Signed-off-by: Alex Deucher (cherry picked from commit 3e5f86c14c3440171f2a3e7a68ceb739297726e9) --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 54 +++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 581d8629b9d9..e0e84ef7f568 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -502,6 +502,52 @@ static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_vcn_inst *vinst) { } +/** + * vcn_v5_0_1_pause_dpg_mode - VCN pause with dpg mode + * + * @vinst: VCN instance + * @new_state: pause state + * + * Pause dpg mode for VCN block + */ +static int vcn_v5_0_1_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) +{ + struct amdgpu_device *adev = vinst->adev; + uint32_t reg_data = 0; + int vcn_inst; + + vcn_inst = GET_INST(VCN, vinst->inst); + + /* pause/unpause if state is changed */ + if (vinst->pause_state.fw_based != new_state->fw_based) { + DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n", + vinst->pause_state.fw_based, new_state->fw_based, + new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE"); + reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + } else { + /* unpause DPG, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + } + vinst->pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + + /** * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode * @@ -518,6 +564,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE}; int vcn_inst; uint32_t tmp; @@ -582,6 +629,9 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, if (indirect) amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + /* Pause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); @@ -775,9 +825,13 @@ static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) int inst_idx = vinst->inst; uint32_t tmp; int vcn_inst; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; vcn_inst = GET_INST(VCN, inst_idx); + /* Unpause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); From 59820fde001500c167342257650541280c622b73 Mon Sep 17 00:00:00 2001 From: Wayne Chang Date: Fri, 18 Apr 2025 16:12:28 +0800 Subject: [PATCH 325/559] usb: gadget: tegra-xudc: ACK ST_RC after clearing CTRL_RUN We identified a bug where the ST_RC bit in the status register was not being acknowledged after clearing the CTRL_RUN bit in the control register. This could lead to unexpected behavior in the USB gadget drivers. This patch resolves the issue by adding the necessary code to explicitly acknowledge ST_RC after clearing CTRL_RUN based on the programming sequence, ensuring proper state transition. Fixes: 49db427232fe ("usb: gadget: Add UDC driver for tegra XUSB device mode controller") Cc: stable Signed-off-by: Wayne Chang Link: https://lore.kernel.org/r/20250418081228.1194779-1-waynec@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/tegra-xudc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index c7fdbc55fb0b..2957316fd3d0 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -1749,6 +1749,10 @@ static int __tegra_xudc_ep_disable(struct tegra_xudc_ep *ep) val = xudc_readl(xudc, CTRL); val &= ~CTRL_RUN; xudc_writel(xudc, val, CTRL); + + val = xudc_readl(xudc, ST); + if (val & ST_RC) + xudc_writel(xudc, ST_RC, ST); } dev_info(xudc->dev, "ep %u disabled\n", ep->index); From 241e2ce88e5a494be7a5d44c0697592f1632fbee Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 18 Apr 2025 04:55:16 +0000 Subject: [PATCH 326/559] usb: cdnsp: Fix issue with resuming from L1 In very rare cases after resuming controller from L1 to L0 it reads registers before the clock UTMI have been enabled and as the result driver reads incorrect value. Most of registers are in APB domain clock but some of them (e.g. PORTSC) are in UTMI domain clock. After entering to L1 state the UTMI clock can be disabled. When controller transition from L1 to L0 the port status change event is reported and in interrupt runtime function driver reads PORTSC. During this read operation controller synchronize UTMI and APB domain but UTMI clock is still disabled and in result it reads 0xFFFFFFFF value. To fix this issue driver increases APB timeout value. The issue is platform specific and if the default value of APB timeout is not sufficient then this time should be set Individually for each platform. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Cc: stable Signed-off-by: Pawel Laszczak Acked-by: Peter Chen Link: https://lore.kernel.org/r/PH7PR07MB953846C57973E4DB134CAA71DDBF2@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.c | 29 +++++++++++++++++++++++++++++ drivers/usb/cdns3/cdnsp-gadget.h | 3 +++ drivers/usb/cdns3/cdnsp-pci.c | 12 ++++++++++-- drivers/usb/cdns3/core.h | 3 +++ 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 87f310841735..7f5534db2086 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -139,6 +139,26 @@ static void cdnsp_clear_port_change_bit(struct cdnsp_device *pdev, (portsc & PORT_CHANGE_BITS), port_regs); } +static void cdnsp_set_apb_timeout_value(struct cdnsp_device *pdev) +{ + struct cdns *cdns = dev_get_drvdata(pdev->dev); + __le32 __iomem *reg; + void __iomem *base; + u32 offset = 0; + u32 val; + + if (!cdns->override_apb_timeout) + return; + + base = &pdev->cap_regs->hc_capbase; + offset = cdnsp_find_next_ext_cap(base, offset, D_XEC_PRE_REGS_CAP); + reg = base + offset + REG_CHICKEN_BITS_3_OFFSET; + + val = le32_to_cpu(readl(reg)); + val = CHICKEN_APB_TIMEOUT_SET(val, cdns->override_apb_timeout); + writel(cpu_to_le32(val), reg); +} + static void cdnsp_set_chicken_bits_2(struct cdnsp_device *pdev, u32 bit) { __le32 __iomem *reg; @@ -1798,6 +1818,15 @@ static int cdnsp_gen_setup(struct cdnsp_device *pdev) pdev->hci_version = HC_VERSION(pdev->hcc_params); pdev->hcc_params = readl(&pdev->cap_regs->hcc_params); + /* + * Override the APB timeout value to give the controller more time for + * enabling UTMI clock and synchronizing APB and UTMI clock domains. + * This fix is platform specific and is required to fixes issue with + * reading incorrect value from PORTSC register after resuming + * from L1 state. + */ + cdnsp_set_apb_timeout_value(pdev); + cdnsp_get_rev_cap(pdev); /* Make sure the Device Controller is halted. */ diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 84887dfea763..87ac0cd113e7 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -520,6 +520,9 @@ struct cdnsp_rev_cap { #define REG_CHICKEN_BITS_2_OFFSET 0x48 #define CHICKEN_XDMA_2_TP_CACHE_DIS BIT(28) +#define REG_CHICKEN_BITS_3_OFFSET 0x4C +#define CHICKEN_APB_TIMEOUT_SET(p, val) (((p) & ~GENMASK(21, 0)) | (val)) + /* XBUF Extended Capability ID. */ #define XBUF_CAP_ID 0xCB #define XBUF_RX_TAG_MASK_0_OFFSET 0x1C diff --git a/drivers/usb/cdns3/cdnsp-pci.c b/drivers/usb/cdns3/cdnsp-pci.c index a51144504ff3..8c361b8394e9 100644 --- a/drivers/usb/cdns3/cdnsp-pci.c +++ b/drivers/usb/cdns3/cdnsp-pci.c @@ -28,6 +28,8 @@ #define PCI_DRIVER_NAME "cdns-pci-usbssp" #define PLAT_DRIVER_NAME "cdns-usbssp" +#define CHICKEN_APB_TIMEOUT_VALUE 0x1C20 + static struct pci_dev *cdnsp_get_second_fun(struct pci_dev *pdev) { /* @@ -139,6 +141,14 @@ static int cdnsp_pci_probe(struct pci_dev *pdev, cdnsp->otg_irq = pdev->irq; } + /* + * Cadence PCI based platform require some longer timeout for APB + * to fixes domain clock synchronization issue after resuming + * controller from L1 state. + */ + cdnsp->override_apb_timeout = CHICKEN_APB_TIMEOUT_VALUE; + pci_set_drvdata(pdev, cdnsp); + if (pci_is_enabled(func)) { cdnsp->dev = dev; cdnsp->gadget_init = cdnsp_gadget_init; @@ -148,8 +158,6 @@ static int cdnsp_pci_probe(struct pci_dev *pdev, goto free_cdnsp; } - pci_set_drvdata(pdev, cdnsp); - device_wakeup_enable(&pdev->dev); if (pci_dev_run_wake(pdev)) pm_runtime_put_noidle(&pdev->dev); diff --git a/drivers/usb/cdns3/core.h b/drivers/usb/cdns3/core.h index 921cccf1ca9d..801be9e61340 100644 --- a/drivers/usb/cdns3/core.h +++ b/drivers/usb/cdns3/core.h @@ -79,6 +79,8 @@ struct cdns3_platform_data { * @pdata: platform data from glue layer * @lock: spinlock structure * @xhci_plat_data: xhci private data structure pointer + * @override_apb_timeout: hold value of APB timeout. For value 0 the default + * value in CHICKEN_BITS_3 will be preserved. * @gadget_init: pointer to gadget initialization function */ struct cdns { @@ -117,6 +119,7 @@ struct cdns { struct cdns3_platform_data *pdata; spinlock_t lock; struct xhci_plat_priv *xhci_plat_data; + u32 override_apb_timeout; int (*gadget_init)(struct cdns *cdns); }; From 732f35cf8bdfece582f6e4a9c659119036577308 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Tue, 22 Apr 2025 19:40:01 +0800 Subject: [PATCH 327/559] usb: host: tegra: Prevent host controller crash when OTG port is used When a USB device is connected to the OTG port, the tegra_xhci_id_work() routine transitions the PHY to host mode and calls xhci_hub_control() with the SetPortFeature command to enable port power. In certain cases, the XHCI controller may be in a low-power state when this operation occurs. If xhci_hub_control() is invoked while the controller is suspended, the PORTSC register may return 0xFFFFFFFF, indicating a read failure. This causes xhci_hc_died() to be triggered, leading to host controller shutdown. Example backtrace: [ 105.445736] Workqueue: events tegra_xhci_id_work [ 105.445747] dump_backtrace+0x0/0x1e8 [ 105.445759] xhci_hc_died.part.48+0x40/0x270 [ 105.445769] tegra_xhci_set_port_power+0xc0/0x240 [ 105.445774] tegra_xhci_id_work+0x130/0x240 To prevent this, ensure the controller is fully resumed before interacting with hardware registers by calling pm_runtime_get_sync() prior to the host mode transition and xhci_hub_control(). Fixes: f836e7843036 ("usb: xhci-tegra: Add OTG support") Cc: stable Signed-off-by: Jim Lin Signed-off-by: Wayne Chang Link: https://lore.kernel.org/r/20250422114001.126367-1-waynec@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index b5c362c2051d..0c7af44d4dae 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1364,6 +1364,7 @@ static void tegra_xhci_id_work(struct work_struct *work) tegra->otg_usb3_port = tegra_xusb_padctl_get_usb3_companion(tegra->padctl, tegra->otg_usb2_port); + pm_runtime_get_sync(tegra->dev); if (tegra->host_mode) { /* switch to host mode */ if (tegra->otg_usb3_port >= 0) { @@ -1393,6 +1394,7 @@ static void tegra_xhci_id_work(struct work_struct *work) } tegra_xhci_set_port_power(tegra, true, true); + pm_runtime_mark_last_busy(tegra->dev); } else { if (tegra->otg_usb3_port >= 0) @@ -1400,6 +1402,7 @@ static void tegra_xhci_id_work(struct work_struct *work) tegra_xhci_set_port_power(tegra, true, false); } + pm_runtime_put_autosuspend(tegra->dev); } #if IS_ENABLED(CONFIG_PM) || IS_ENABLED(CONFIG_PM_SLEEP) From 8e3820271c517ceb89ab7442656ba49fa23ee1d0 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 22 Apr 2025 16:02:29 +0530 Subject: [PATCH 328/559] usb: gadget: f_ecm: Add get_status callback When host sends GET_STATUS to ECM interface, handle the request from the function driver. Since the interface is wakeup capable, set the corresponding bit, and set RW bit if the function is already armed for wakeup by the host. Cc: stable Fixes: 481c225c4802 ("usb: gadget: Handle function suspend feature selector") Signed-off-by: Prashanth K Reviewed-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250422103231.1954387-2-prashanth.k@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ecm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c index 80841de845b0..027226325039 100644 --- a/drivers/usb/gadget/function/f_ecm.c +++ b/drivers/usb/gadget/function/f_ecm.c @@ -892,6 +892,12 @@ static void ecm_resume(struct usb_function *f) gether_resume(&ecm->port); } +static int ecm_get_status(struct usb_function *f) +{ + return (f->func_wakeup_armed ? USB_INTRF_STAT_FUNC_RW : 0) | + USB_INTRF_STAT_FUNC_RW_CAP; +} + static void ecm_free(struct usb_function *f) { struct f_ecm *ecm; @@ -960,6 +966,7 @@ static struct usb_function *ecm_alloc(struct usb_function_instance *fi) ecm->port.func.disable = ecm_disable; ecm->port.func.free_func = ecm_free; ecm->port.func.suspend = ecm_suspend; + ecm->port.func.get_status = ecm_get_status; ecm->port.func.resume = ecm_resume; return &ecm->port.func; From 5977a58dd5a4865198b0204b998adb0f634abe19 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 22 Apr 2025 16:02:30 +0530 Subject: [PATCH 329/559] usb: gadget: Use get_status callback to set remote wakeup capability Currently when the host sends GET_STATUS request for an interface, we use get_status callbacks to set/clear remote wakeup capability of that interface. And if get_status callback isn't present for that interface, then we assume its remote wakeup capability based on bmAttributes. Now consider a scenario, where we have a USB configuration with multiple interfaces (say ECM + ADB), here ECM is remote wakeup capable and as of now ADB isn't. And bmAttributes will indicate the device as wakeup capable. With the current implementation, when host sends GET_STATUS request for both interfaces, we will set FUNC_RW_CAP for both. This results in USB3 CV Chapter 9.15 (Function Remote Wakeup Test) failures as host expects remote wakeup from both interfaces. The above scenario is just an example, and the failure can be observed if we use configuration with any interface except ECM. Hence avoid configuring remote wakeup capability from composite driver based on bmAttributes, instead use get_status callbacks and let the function drivers decide this. Cc: stable Fixes: 481c225c4802 ("usb: gadget: Handle function suspend feature selector") Signed-off-by: Prashanth K Reviewed-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250422103231.1954387-3-prashanth.k@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 869ad99afb48..8dbc132a505e 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2011,15 +2011,13 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) if (f->get_status) { status = f->get_status(f); + if (status < 0) break; - } else { - /* Set D0 and D1 bits based on func wakeup capability */ - if (f->config->bmAttributes & USB_CONFIG_ATT_WAKEUP) { - status |= USB_INTRF_STAT_FUNC_RW_CAP; - if (f->func_wakeup_armed) - status |= USB_INTRF_STAT_FUNC_RW; - } + + /* if D5 is not set, then device is not wakeup capable */ + if (!(f->config->bmAttributes & USB_CONFIG_ATT_WAKEUP)) + status &= ~(USB_INTRF_STAT_FUNC_RW_CAP | USB_INTRF_STAT_FUNC_RW); } put_unaligned_le16(status & 0x0000ffff, req->buf); From 2372f1caeca433c4c01c2482f73fbe057f5168ce Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 22 Apr 2025 16:02:31 +0530 Subject: [PATCH 330/559] usb: dwc3: gadget: Make gadget_wakeup asynchronous Currently gadget_wakeup() waits for U0 synchronously if it was called from func_wakeup(), this is because we need to send the function wakeup command soon after the link is active. And the call is made synchronous by polling DSTS continuosly for 20000 times in __dwc3_gadget_wakeup(). But it observed that sometimes the link is not active even after polling 20K times, leading to remote wakeup failures. Adding a small delay between each poll helps, but that won't guarantee resolution in future. Hence make the gadget_wakeup completely asynchronous. Since multiple interfaces can issue a function wakeup at once, add a new variable wakeup_pending_funcs which will indicate the functions that has issued func_wakup, this is represented in a bitmap format. If the link is in U3, dwc3_gadget_func_wakeup() will set the bit corresponding to interface_id and bail out. Once link comes back to U0, linksts_change irq is triggered, where the function wakeup command is sent based on bitmap. Cc: stable Fixes: 92c08a84b53e ("usb: dwc3: Add function suspend and function wakeup support") Signed-off-by: Prashanth K Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250422103231.1954387-4-prashanth.k@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 4 +++ drivers/usb/dwc3/gadget.c | 60 +++++++++++++++------------------------ 2 files changed, 27 insertions(+), 37 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index aaa39e663f60..27eae4cf223d 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1164,6 +1164,9 @@ struct dwc3_scratchpad_array { * @gsbuscfg0_reqinfo: store GSBUSCFG0.DATRDREQINFO, DESRDREQINFO, * DATWRREQINFO, and DESWRREQINFO value passed from * glue driver. + * @wakeup_pending_funcs: Indicates whether any interface has requested for + * function wakeup in bitmap format where bit position + * represents interface_id. */ struct dwc3 { struct work_struct drd_work; @@ -1394,6 +1397,7 @@ struct dwc3 { int num_ep_resized; struct dentry *debug_root; u32 gsbuscfg0_reqinfo; + u32 wakeup_pending_funcs; }; #define INCRX_BURST_MODE 0 diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 8c30d86cc4e3..321361288935 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -276,8 +276,6 @@ int dwc3_send_gadget_generic_command(struct dwc3 *dwc, unsigned int cmd, return ret; } -static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async); - /** * dwc3_send_gadget_ep_cmd - issue an endpoint command * @dep: the endpoint to which the command is going to be issued @@ -2359,10 +2357,8 @@ static int dwc3_gadget_get_frame(struct usb_gadget *g) return __dwc3_gadget_get_frame(dwc); } -static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async) +static int __dwc3_gadget_wakeup(struct dwc3 *dwc) { - int retries; - int ret; u32 reg; @@ -2390,8 +2386,7 @@ static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async) return -EINVAL; } - if (async) - dwc3_gadget_enable_linksts_evts(dwc, true); + dwc3_gadget_enable_linksts_evts(dwc, true); ret = dwc3_gadget_set_link_state(dwc, DWC3_LINK_STATE_RECOV); if (ret < 0) { @@ -2410,27 +2405,8 @@ static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async) /* * Since link status change events are enabled we will receive - * an U0 event when wakeup is successful. So bail out. + * an U0 event when wakeup is successful. */ - if (async) - return 0; - - /* poll until Link State changes to ON */ - retries = 20000; - - while (retries--) { - reg = dwc3_readl(dwc->regs, DWC3_DSTS); - - /* in HS, means ON */ - if (DWC3_DSTS_USBLNKST(reg) == DWC3_LINK_STATE_U0) - break; - } - - if (DWC3_DSTS_USBLNKST(reg) != DWC3_LINK_STATE_U0) { - dev_err(dwc->dev, "failed to send remote wakeup\n"); - return -EINVAL; - } - return 0; } @@ -2451,7 +2427,7 @@ static int dwc3_gadget_wakeup(struct usb_gadget *g) spin_unlock_irqrestore(&dwc->lock, flags); return -EINVAL; } - ret = __dwc3_gadget_wakeup(dwc, true); + ret = __dwc3_gadget_wakeup(dwc); spin_unlock_irqrestore(&dwc->lock, flags); @@ -2479,14 +2455,10 @@ static int dwc3_gadget_func_wakeup(struct usb_gadget *g, int intf_id) */ link_state = dwc3_gadget_get_link_state(dwc); if (link_state == DWC3_LINK_STATE_U3) { - ret = __dwc3_gadget_wakeup(dwc, false); - if (ret) { - spin_unlock_irqrestore(&dwc->lock, flags); - return -EINVAL; - } - dwc3_resume_gadget(dwc); - dwc->suspended = false; - dwc->link_state = DWC3_LINK_STATE_U0; + dwc->wakeup_pending_funcs |= BIT(intf_id); + ret = __dwc3_gadget_wakeup(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); + return ret; } ret = dwc3_send_gadget_generic_command(dwc, DWC3_DGCMD_DEV_NOTIFICATION, @@ -4353,6 +4325,8 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc, { enum dwc3_link_state next = evtinfo & DWC3_LINK_STATE_MASK; unsigned int pwropt; + int ret; + int intf_id; /* * WORKAROUND: DWC3 < 2.50a have an issue when configured without @@ -4428,7 +4402,7 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc, switch (next) { case DWC3_LINK_STATE_U0: - if (dwc->gadget->wakeup_armed) { + if (dwc->gadget->wakeup_armed || dwc->wakeup_pending_funcs) { dwc3_gadget_enable_linksts_evts(dwc, false); dwc3_resume_gadget(dwc); dwc->suspended = false; @@ -4451,6 +4425,18 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc, } dwc->link_state = next; + + /* Proceed with func wakeup if any interfaces that has requested */ + while (dwc->wakeup_pending_funcs && (next == DWC3_LINK_STATE_U0)) { + intf_id = ffs(dwc->wakeup_pending_funcs) - 1; + ret = dwc3_send_gadget_generic_command(dwc, DWC3_DGCMD_DEV_NOTIFICATION, + DWC3_DGCMDPAR_DN_FUNC_WAKE | + DWC3_DGCMDPAR_INTF_SEL(intf_id)); + if (ret) + dev_err(dwc->dev, "Failed to send DN wake for intf %d\n", intf_id); + + dwc->wakeup_pending_funcs &= ~BIT(intf_id); + } } static void dwc3_gadget_suspend_interrupt(struct dwc3 *dwc, From a5c7973539b010874a37a0e846e62ac6f00553ba Mon Sep 17 00:00:00 2001 From: Alexey Charkov Date: Fri, 25 Apr 2025 18:11:11 +0400 Subject: [PATCH 331/559] usb: uhci-platform: Make the clock really optional Device tree bindings state that the clock is optional for UHCI platform controllers, and some existing device trees don't provide those - such as those for VIA/WonderMedia devices. The driver however fails to probe now if no clock is provided, because devm_clk_get returns an error pointer in such case. Switch to devm_clk_get_optional instead, so that it could probe again on those platforms where no clocks are given. Cc: stable Fixes: 26c502701c52 ("usb: uhci: Add clk support to uhci-platform") Signed-off-by: Alexey Charkov Link: https://lore.kernel.org/r/20250425-uhci-clock-optional-v1-1-a1d462592f29@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/uhci-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/uhci-platform.c b/drivers/usb/host/uhci-platform.c index a7c934404ebc..62318291f566 100644 --- a/drivers/usb/host/uhci-platform.c +++ b/drivers/usb/host/uhci-platform.c @@ -121,7 +121,7 @@ static int uhci_hcd_platform_probe(struct platform_device *pdev) } /* Get and enable clock if any specified */ - uhci->clk = devm_clk_get(&pdev->dev, NULL); + uhci->clk = devm_clk_get_optional(&pdev->dev, NULL); if (IS_ERR(uhci->clk)) { ret = PTR_ERR(uhci->clk); goto err_rmr; From 9f657a92805cfc98e11cf5da9e8f4e02ecff2260 Mon Sep 17 00:00:00 2001 From: Lukasz Czechowski Date: Fri, 25 Apr 2025 17:18:06 +0200 Subject: [PATCH 332/559] usb: misc: onboard_usb_dev: fix support for Cypress HX3 hubs The Cypress HX3 USB3.0 hubs use different PID values depending on the product variant. The comment in compatibles table is misleading, as the currently used PIDs (0x6504 and 0x6506 for USB 3.0 and USB 2.0, respectively) are defaults for the CYUSB331x, while CYUSB330x and CYUSB332x variants use different values. Based on the datasheet [1], update the compatible usb devices table to handle different types of the hub. The change also includes vendor mode PIDs, which are used by the hub in I2C Master boot mode, if connected EEPROM contains invalid signature or is blank. This allows to correctly boot the hub even if the EEPROM will have broken content. Number of vcc supplies and timing requirements are the same for all HX variants, so the platform driver's match table does not have to be extended. [1] https://www.infineon.com/dgdl/Infineon-HX3_USB_3_0_Hub_Consumer_Industrial-DataSheet-v22_00-EN.pdf?fileId=8ac78c8c7d0d8da4017d0ecb53f644b8 Table 9. PID Values Fixes: b43cd82a1a40 ("usb: misc: onboard-hub: add support for Cypress HX3 USB 3.0 family") Cc: stable Signed-off-by: Lukasz Czechowski Link: https://lore.kernel.org/r/20250425-onboard_usb_dev-v2-1-4a76a474a010@thaumatec.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_dev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/onboard_usb_dev.c b/drivers/usb/misc/onboard_usb_dev.c index 75ac3c6aa92d..f5372dfa241a 100644 --- a/drivers/usb/misc/onboard_usb_dev.c +++ b/drivers/usb/misc/onboard_usb_dev.c @@ -569,8 +569,14 @@ static void onboard_dev_usbdev_disconnect(struct usb_device *udev) } static const struct usb_device_id onboard_dev_id_table[] = { - { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6504) }, /* CYUSB33{0,1,2}x/CYUSB230x 3.0 HUB */ - { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6506) }, /* CYUSB33{0,1,2}x/CYUSB230x 2.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6500) }, /* CYUSB330x 3.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6502) }, /* CYUSB330x 2.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6503) }, /* CYUSB33{0,1}x 2.0 HUB, Vendor Mode */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6504) }, /* CYUSB331x 3.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6506) }, /* CYUSB331x 2.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6507) }, /* CYUSB332x 2.0 HUB, Vendor Mode */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6508) }, /* CYUSB332x 3.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x650a) }, /* CYUSB332x 2.0 HUB */ { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6570) }, /* CY7C6563x 2.0 HUB */ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 HUB */ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0610) }, /* Genesys Logic GL852G USB 2.0 HUB */ From 364618c89d4c57c85e5fc51a2446cd939bf57802 Mon Sep 17 00:00:00 2001 From: Andrei Kuchynski Date: Thu, 24 Apr 2025 08:44:28 +0000 Subject: [PATCH 333/559] usb: typec: ucsi: displayport: Fix deadlock This patch introduces the ucsi_con_mutex_lock / ucsi_con_mutex_unlock functions to the UCSI driver. ucsi_con_mutex_lock ensures the connector mutex is only locked if a connection is established and the partner pointer is valid. This resolves a deadlock scenario where ucsi_displayport_remove_partner holds con->mutex waiting for dp_altmode_work to complete while dp_altmode_work attempts to acquire it. Cc: stable Fixes: af8622f6a585 ("usb: typec: ucsi: Support for DisplayPort alt mode") Signed-off-by: Andrei Kuchynski Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250424084429.3220757-2-akuchynski@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/displayport.c | 19 +++++++++------- drivers/usb/typec/ucsi/ucsi.c | 34 ++++++++++++++++++++++++++++ drivers/usb/typec/ucsi/ucsi.h | 2 ++ 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index 420af5139c70..acd053d4e38c 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -54,7 +54,8 @@ static int ucsi_displayport_enter(struct typec_altmode *alt, u32 *vdo) u8 cur = 0; int ret; - mutex_lock(&dp->con->lock); + if (!ucsi_con_mutex_lock(dp->con)) + return -ENOTCONN; if (!dp->override && dp->initialized) { const struct typec_altmode *p = typec_altmode_get_partner(alt); @@ -100,7 +101,7 @@ static int ucsi_displayport_enter(struct typec_altmode *alt, u32 *vdo) schedule_work(&dp->work); ret = 0; err_unlock: - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return ret; } @@ -112,7 +113,8 @@ static int ucsi_displayport_exit(struct typec_altmode *alt) u64 command; int ret = 0; - mutex_lock(&dp->con->lock); + if (!ucsi_con_mutex_lock(dp->con)) + return -ENOTCONN; if (!dp->override) { const struct typec_altmode *p = typec_altmode_get_partner(alt); @@ -144,7 +146,7 @@ static int ucsi_displayport_exit(struct typec_altmode *alt) schedule_work(&dp->work); out_unlock: - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return ret; } @@ -202,20 +204,21 @@ static int ucsi_displayport_vdm(struct typec_altmode *alt, int cmd = PD_VDO_CMD(header); int svdm_version; - mutex_lock(&dp->con->lock); + if (!ucsi_con_mutex_lock(dp->con)) + return -ENOTCONN; if (!dp->override && dp->initialized) { const struct typec_altmode *p = typec_altmode_get_partner(alt); dev_warn(&p->dev, "firmware doesn't support alternate mode overriding\n"); - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return -EOPNOTSUPP; } svdm_version = typec_altmode_get_svdm_version(alt); if (svdm_version < 0) { - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return svdm_version; } @@ -259,7 +262,7 @@ static int ucsi_displayport_vdm(struct typec_altmode *alt, break; } - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return 0; } diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index e8c7e9dc4930..01ce858a1a2b 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1922,6 +1922,40 @@ void ucsi_set_drvdata(struct ucsi *ucsi, void *data) } EXPORT_SYMBOL_GPL(ucsi_set_drvdata); +/** + * ucsi_con_mutex_lock - Acquire the connector mutex + * @con: The connector interface to lock + * + * Returns true on success, false if the connector is disconnected + */ +bool ucsi_con_mutex_lock(struct ucsi_connector *con) +{ + bool mutex_locked = false; + bool connected = true; + + while (connected && !mutex_locked) { + mutex_locked = mutex_trylock(&con->lock) != 0; + connected = UCSI_CONSTAT(con, CONNECTED); + if (connected && !mutex_locked) + msleep(20); + } + + connected = connected && con->partner; + if (!connected && mutex_locked) + mutex_unlock(&con->lock); + + return connected; +} + +/** + * ucsi_con_mutex_unlock - Release the connector mutex + * @con: The connector interface to unlock + */ +void ucsi_con_mutex_unlock(struct ucsi_connector *con) +{ + mutex_unlock(&con->lock); +} + /** * ucsi_create - Allocate UCSI instance * @dev: Device interface to the PPM (Platform Policy Manager) diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 3a2c1762bec1..9c5278a0c5d4 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -94,6 +94,8 @@ int ucsi_register(struct ucsi *ucsi); void ucsi_unregister(struct ucsi *ucsi); void *ucsi_get_drvdata(struct ucsi *ucsi); void ucsi_set_drvdata(struct ucsi *ucsi, void *data); +bool ucsi_con_mutex_lock(struct ucsi_connector *con); +void ucsi_con_mutex_unlock(struct ucsi_connector *con); void ucsi_connector_change(struct ucsi *ucsi, u8 num); From 312d79669e71283d05c05cc49a1a31e59e3d9e0e Mon Sep 17 00:00:00 2001 From: Andrei Kuchynski Date: Thu, 24 Apr 2025 08:44:29 +0000 Subject: [PATCH 334/559] usb: typec: ucsi: displayport: Fix NULL pointer access This patch ensures that the UCSI driver waits for all pending tasks in the ucsi_displayport_work workqueue to finish executing before proceeding with the partner removal. Cc: stable Fixes: af8622f6a585 ("usb: typec: ucsi: Support for DisplayPort alt mode") Signed-off-by: Andrei Kuchynski Reviewed-by: Heikki Krogerus Reviewed-by: Benson Leung Link: https://lore.kernel.org/r/20250424084429.3220757-3-akuchynski@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/displayport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index acd053d4e38c..8aae80b457d7 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -299,6 +299,8 @@ void ucsi_displayport_remove_partner(struct typec_altmode *alt) if (!dp) return; + cancel_work_sync(&dp->work); + dp->data.conf = 0; dp->data.status = 0; dp->initialized = false; From 8614ecdb1570e4fffe87ebdc62b613ed66f1f6a6 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 25 Apr 2025 05:55:40 +0000 Subject: [PATCH 335/559] usb: cdnsp: fix L1 resume issue for RTL_REVISION_NEW_LPM version The controllers with rtl version larger than RTL_REVISION_NEW_LPM (0x00002700) has bug which causes that controller doesn't resume from L1 state. It happens if after receiving LPM packet controller starts transitioning to L1 and in this moment the driver force resuming by write operation to PORTSC.PLS. It's corner case and happens when write operation to PORTSC occurs during device delay before transitioning to L1 after transmitting ACK time (TL1TokenRetry). Forcing transition from L1->L0 by driver for revision larger than RTL_REVISION_NEW_LPM is not needed, so driver can simply fix this issue through block call of cdnsp_force_l0_go function. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Cc: stable Signed-off-by: Pawel Laszczak Acked-by: Peter Chen Link: https://lore.kernel.org/r/PH7PR07MB9538B55C3A6E71F9ED29E980DD842@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.c | 2 ++ drivers/usb/cdns3/cdnsp-gadget.h | 3 +++ drivers/usb/cdns3/cdnsp-ring.c | 3 ++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 7f5534db2086..4824a10df07e 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1793,6 +1793,8 @@ static void cdnsp_get_rev_cap(struct cdnsp_device *pdev) reg += cdnsp_find_next_ext_cap(reg, 0, RTL_REV_CAP); pdev->rev_cap = reg; + pdev->rtl_revision = readl(&pdev->rev_cap->rtl_revision); + dev_info(pdev->dev, "Rev: %08x/%08x, eps: %08x, buff: %08x/%08x\n", readl(&pdev->rev_cap->ctrl_revision), readl(&pdev->rev_cap->rtl_revision), diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 87ac0cd113e7..12534be52f39 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -1360,6 +1360,7 @@ struct cdnsp_port { * @rev_cap: Controller Capabilities Registers. * @hcs_params1: Cached register copies of read-only HCSPARAMS1 * @hcc_params: Cached register copies of read-only HCCPARAMS1 + * @rtl_revision: Cached controller rtl revision. * @setup: Temporary buffer for setup packet. * @ep0_preq: Internal allocated request used during enumeration. * @ep0_stage: ep0 stage during enumeration process. @@ -1414,6 +1415,8 @@ struct cdnsp_device { __u32 hcs_params1; __u32 hcs_params3; __u32 hcc_params; + #define RTL_REVISION_NEW_LPM 0x2700 + __u32 rtl_revision; /* Lock used in interrupt thread context. */ spinlock_t lock; struct usb_ctrlrequest setup; diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 46852529499d..fd06cb85c4ea 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -308,7 +308,8 @@ static bool cdnsp_ring_ep_doorbell(struct cdnsp_device *pdev, writel(db_value, reg_addr); - cdnsp_force_l0_go(pdev); + if (pdev->rtl_revision < RTL_REVISION_NEW_LPM) + cdnsp_force_l0_go(pdev); /* Doorbell was set. */ return true; From 054c5145540e5ad5b80adf23a5e3e2fc281fb8aa Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 30 Apr 2025 15:48:10 +0200 Subject: [PATCH 336/559] USB: usbtmc: use interruptible sleep in usbtmc_read usbtmc_read() calls usbtmc_generic_read() which uses interruptible sleep, but usbtmc_read() itself uses uninterruptble sleep for mutual exclusion between threads. That makes no sense. Both should use interruptible sleep. Fixes: 5b775f672cc99 ("USB: add USB test and measurement class driver") Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20250430134810.226015-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 34e46ef308ab..634c3bcbb413 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -1380,7 +1380,10 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf, if (!buffer) return -ENOMEM; - mutex_lock(&data->io_mutex); + retval = mutex_lock_interruptible(&data->io_mutex); + if (retval < 0) + goto exit_nolock; + if (data->zombie) { retval = -ENODEV; goto exit; @@ -1503,6 +1506,7 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf, exit: mutex_unlock(&data->io_mutex); +exit_nolock: kfree(buffer); return retval; } From e918d3959b5ae0e793b8f815ce62240e10ba03a4 Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Tue, 29 Apr 2025 23:47:01 +0000 Subject: [PATCH 337/559] usb: typec: tcpm: delay SNK_TRY_WAIT_DEBOUNCE to SRC_TRYWAIT transition This patch fixes Type-C Compliance Test TD 4.7.6 - Try.SNK DRP Connect SNKAS. The compliance tester moves into SNK_UNATTACHED during toggling and expects the PUT to apply Rp after tPDDebounce of detection. If the port is in SNK_TRY_WAIT_DEBOUNCE, it will move into SRC_TRYWAIT immediately and apply Rp. This violates TD 4.7.5.V.3, where the tester confirms that the PUT attaches Rp after the transitions to Unattached.SNK for tPDDebounce. Change the tcpm_set_state delay between SNK_TRY_WAIT_DEBOUNCE and SRC_TRYWAIT to tPDDebounce. Fixes: a0a3e04e6b2c ("staging: typec: tcpm: Check for Rp for tPDDebounce") Cc: stable Signed-off-by: RD Babiera Reviewed-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250429234703.3748506-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index a99db4e025cd..8adf6f954633 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5965,7 +5965,7 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, case SNK_TRY_WAIT_DEBOUNCE: if (!tcpm_port_is_sink(port)) { port->max_wait = 0; - tcpm_set_state(port, SRC_TRYWAIT, 0); + tcpm_set_state(port, SRC_TRYWAIT, PD_T_PD_DEBOUNCE); } break; case SRC_TRY_WAIT: From 95deee37a12364f410d22c6a8383f59738a2fef3 Mon Sep 17 00:00:00 2001 From: Will McVicker Date: Thu, 24 Apr 2025 11:04:19 -0700 Subject: [PATCH 338/559] platform: Fix race condition during DMA configure at IOMMU probe time To avoid a race between the IOMMU probing thread and the device driver async probing thread during configuration of the platform DMA, update `platform_dma_configure()` to read `dev->driver` once and test if it's NULL before using it. This ensures that we don't de-reference an invalid platform driver pointer if the device driver is asynchronously bound while configuring the DMA. Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path") Signed-off-by: Will McVicker Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20250424180420.3928523-1-willmcvicker@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 1813cfd0c4bd..cfccf3ff36e7 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1440,7 +1440,7 @@ static void platform_shutdown(struct device *_dev) static int platform_dma_configure(struct device *dev) { - struct platform_driver *drv = to_platform_driver(dev->driver); + struct device_driver *drv = READ_ONCE(dev->driver); struct fwnode_handle *fwnode = dev_fwnode(dev); enum dev_dma_attr attr; int ret = 0; @@ -1451,8 +1451,8 @@ static int platform_dma_configure(struct device *dev) attr = acpi_get_dma_attr(to_acpi_device_node(fwnode)); ret = acpi_dma_configure(dev, attr); } - /* @drv may not be valid when we're called from the IOMMU layer */ - if (ret || !dev->driver || drv->driver_managed_dma) + /* @dev->driver may not be valid when we're called from the IOMMU layer */ + if (ret || !drv || to_platform_driver(drv)->driver_managed_dma) return ret; ret = iommu_device_use_default_domain(dev); From e2699274d5a43b95af1b806aa6e3b1157107665d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 30 Apr 2025 22:37:13 -0400 Subject: [PATCH 339/559] bcachefs: Fix __bch2_dev_group_set() bch2_sb_disk_groups_to_cpu() goes off of the superblock member info, so we need to set that first. Reported-by: Stijn Tintel Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_groups.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index 1186280b29e9..2ca3cbf12b71 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -470,23 +470,22 @@ inval: int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) { - struct bch_member *mi; - int ret, v = -1; + lockdep_assert_held(&c->sb_lock); - if (!strlen(name) || !strcmp(name, "none")) - return 0; - v = bch2_disk_path_find_or_create(&c->disk_sb, name); - if (v < 0) - return v; + if (!strlen(name) || !strcmp(name, "none")) { + struct bch_member *mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_GROUP(mi, 0); + } else { + int v = bch2_disk_path_find_or_create(&c->disk_sb, name); + if (v < 0) + return v; - ret = bch2_sb_disk_groups_to_cpu(c); - if (ret) - return ret; + struct bch_member *mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_GROUP(mi, v + 1); + } - mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - SET_BCH_MEMBER_GROUP(mi, v + 1); - return 0; + return bch2_sb_disk_groups_to_cpu(c); } int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) From 5a295bad38b1057dd13811242ac981bb674ab190 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Thu, 17 Apr 2025 17:07:17 -0700 Subject: [PATCH 340/559] drm/xe/eustall: Resolve a possible circular locking dependency Use a separate lock in the polling function eu_stall_data_buf_poll() instead of eu_stall->stream_lock. This would prevent a possible circular locking dependency leading to a deadlock as described below. This would also require additional locking with the new lock in the read function. <4> [787.192986] ====================================================== <4> [787.192988] WARNING: possible circular locking dependency detected <4> [787.192991] 6.14.0-rc7-xe+ #1 Tainted: G U <4> [787.192993] ------------------------------------------------------ <4> [787.192994] xe_eu_stall/20093 is trying to acquire lock: <4> [787.192996] ffff88819847e2c0 ((work_completion) (&(&stream->buf_poll_work)->work)), at: __flush_work+0x1f8/0x5e0 <4> [787.193005] but task is already holding lock: <4> [787.193007] ffff88814ce83ba8 (>->eu_stall->stream_lock){3:3}, at: xe_eu_stall_stream_ioctl+0x41/0x6a0 [xe] <4> [787.193090] which lock already depends on the new lock. <4> [787.193093] the existing dependency chain (in reverse order) is: <4> [787.193095] -> #1 (>->eu_stall->stream_lock){+.+.}-{3:3}: <4> [787.193099] __mutex_lock+0xb4/0xe40 <4> [787.193104] mutex_lock_nested+0x1b/0x30 <4> [787.193106] eu_stall_data_buf_poll_work_fn+0x44/0x1d0 [xe] <4> [787.193155] process_one_work+0x21c/0x740 <4> [787.193159] worker_thread+0x1db/0x3c0 <4> [787.193161] kthread+0x10d/0x270 <4> [787.193164] ret_from_fork+0x44/0x70 <4> [787.193168] ret_from_fork_asm+0x1a/0x30 <4> [787.193172] -> #0 ((work_completion)(&(&stream->buf_poll_work)->work)){+.+.}-{0:0}: <4> [787.193176] __lock_acquire+0x1637/0x2810 <4> [787.193180] lock_acquire+0xc9/0x300 <4> [787.193183] __flush_work+0x219/0x5e0 <4> [787.193186] cancel_delayed_work_sync+0x87/0x90 <4> [787.193189] xe_eu_stall_disable_locked+0x9a/0x260 [xe] <4> [787.193237] xe_eu_stall_stream_ioctl+0x5b/0x6a0 [xe] <4> [787.193285] __x64_sys_ioctl+0xa4/0xe0 <4> [787.193289] x64_sys_call+0x131e/0x2650 <4> [787.193292] do_syscall_64+0x91/0x180 <4> [787.193295] entry_SYSCALL_64_after_hwframe+0x76/0x7e <4> [787.193299] other info that might help us debug this: <4> [787.193302] Possible unsafe locking scenario: <4> [787.193304] CPU0 CPU1 <4> [787.193305] ---- ---- <4> [787.193306] lock(>->eu_stall->stream_lock); <4> [787.193308] lock((work_completion) (&(&stream->buf_poll_work)->work)); <4> [787.193311] lock(>->eu_stall->stream_lock); <4> [787.193313] lock((work_completion) (&(&stream->buf_poll_work)->work)); <4> [787.193315] *** DEADLOCK *** Fixes: 760edec939685 ("drm/xe/eustall: Add support to read() and poll() EU stall data") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4598 Signed-off-by: Harish Chegondi Reviewed-by: Ashutosh Dixit Signed-off-by: Ashutosh Dixit Link: https://lore.kernel.org/r/c896932fca84f79db2df5942911997ed77b2b9b6.1744934656.git.harish.chegondi@intel.com (cherry picked from commit c2b1f1b8641372bb2e563c49eb25632623a860fc) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_eu_stall.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index f2bb9168967c..78f28f3c5e5c 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -52,6 +52,8 @@ struct xe_eu_stall_data_stream { struct xe_gt *gt; struct xe_bo *bo; + /* Lock to protect data buffer pointers */ + struct mutex xecore_buf_lock; struct per_xecore_buf *xecore_buf; struct { bool reported_to_user; @@ -378,7 +380,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) u16 group, instance; unsigned int xecore; - mutex_lock(>->eu_stall->stream_lock); + mutex_lock(&stream->xecore_buf_lock); for_each_dss_steering(xecore, gt, group, instance) { xecore_buf = &stream->xecore_buf[xecore]; read_ptr = xecore_buf->read; @@ -396,7 +398,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) set_bit(xecore, stream->data_drop.mask); xecore_buf->write = write_ptr; } - mutex_unlock(>->eu_stall->stream_lock); + mutex_unlock(&stream->xecore_buf_lock); return min_data_present; } @@ -511,11 +513,13 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st unsigned int xecore; int ret = 0; + mutex_lock(&stream->xecore_buf_lock); if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { if (!stream->data_drop.reported_to_user) { stream->data_drop.reported_to_user = true; xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n", XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask); + mutex_unlock(&stream->xecore_buf_lock); return -EIO; } stream->data_drop.reported_to_user = false; @@ -527,6 +531,7 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st if (ret || count == total_size) break; } + mutex_unlock(&stream->xecore_buf_lock); return total_size ?: (ret ?: -EAGAIN); } @@ -583,6 +588,7 @@ static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream) { struct xe_gt *gt = stream->gt; + mutex_destroy(&stream->xecore_buf_lock); gt->eu_stall->stream = NULL; kfree(stream); } @@ -718,6 +724,7 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream, } init_waitqueue_head(&stream->poll_wq); + mutex_init(&stream->xecore_buf_lock); INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn); stream->per_xecore_buf_size = per_xecore_buf_size; stream->sampling_rate_mult = props->sampling_rate_mult; From 1d622a4fe2b9a30cd4af2e858d793d05f8a82774 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Sun, 20 Apr 2025 22:59:01 -0700 Subject: [PATCH 341/559] drm/xe/eustall: Do not support EU stall on SRIOV VF EU stall sampling is not supported on SRIOV VF. Do not initialize or open EU stall stream on SRIOV VF. Fixes: 9a0b11d4cf3b ("drm/xe/eustall: Add support to init, enable and disable EU stall sampling") Signed-off-by: Harish Chegondi Reviewed-by: Ashutosh Dixit Signed-off-by: Ashutosh Dixit Link: https://lore.kernel.org/r/10db5d1c7e17aadca7078ff74575b7ffc0d5d6b8.1745215022.git.harish.chegondi@intel.com (cherry picked from commit 6ed20625a4b8189a1bd6598aa58e03147ce378ee) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_eu_stall.c | 3 +++ drivers/gpu/drm/xe/xe_eu_stall.h | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index 78f28f3c5e5c..e2bb156c71fb 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -210,6 +210,9 @@ int xe_eu_stall_init(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int ret; + if (!xe_eu_stall_supported_on_platform(xe)) + return 0; + gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL); if (!gt->eu_stall) { ret = -ENOMEM; diff --git a/drivers/gpu/drm/xe/xe_eu_stall.h b/drivers/gpu/drm/xe/xe_eu_stall.h index ed9d0f233566..d1c76e503799 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.h +++ b/drivers/gpu/drm/xe/xe_eu_stall.h @@ -7,6 +7,7 @@ #define __XE_EU_STALL_H__ #include "xe_gt_types.h" +#include "xe_sriov.h" size_t xe_eu_stall_get_per_xecore_buf_size(void); size_t xe_eu_stall_data_record_size(struct xe_device *xe); @@ -19,6 +20,6 @@ int xe_eu_stall_stream_open(struct drm_device *dev, static inline bool xe_eu_stall_supported_on_platform(struct xe_device *xe) { - return xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20; + return !IS_SRIOV_VF(xe) && (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20); } #endif From fee4d171451c1ad9e8aaf65fc0ab7d143a33bd72 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 1 May 2025 11:47:47 +0100 Subject: [PATCH 342/559] arm64: errata: Add missing sentinels to Spectre-BHB MIDR arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit a5951389e58d ("arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected() lists") added some additional CPUs to the Spectre-BHB workaround, including some new arrays for designs that require new 'k' values for the workaround to be effective. Unfortunately, the new arrays omitted the sentinel entry and so is_midr_in_range_list() will walk off the end when it doesn't find a match. With UBSAN enabled, this leads to a crash during boot when is_midr_in_range_list() is inlined (which was more common prior to c8c2647e69be ("arm64: Make  _midr_in_range_list() an exported function")): | Internal error: aarch64 BRK: 00000000f2000001 [#1] PREEMPT SMP | pstate: 804000c5 (Nzcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : spectre_bhb_loop_affected+0x28/0x30 | lr : is_spectre_bhb_affected+0x170/0x190 | [...] | Call trace: | spectre_bhb_loop_affected+0x28/0x30 | update_cpu_capabilities+0xc0/0x184 | init_cpu_features+0x188/0x1a4 | cpuinfo_store_boot_cpu+0x4c/0x60 | smp_prepare_boot_cpu+0x38/0x54 | start_kernel+0x8c/0x478 | __primary_switched+0xc8/0xd4 | Code: 6b09011f 54000061 52801080 d65f03c0 (d4200020) | ---[ end trace 0000000000000000 ]--- | Kernel panic - not syncing: aarch64 BRK: Fatal exception Add the missing sentinel entries. Cc: Lee Jones Cc: James Morse Cc: Doug Anderson Cc: Shameer Kolothum Cc: Reported-by: Greg Kroah-Hartman Fixes: a5951389e58d ("arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected() lists") Signed-off-by: Will Deacon Reviewed-by: Lee Jones Reviewed-by: Douglas Anderson Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250501104747.28431-1-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/proton-pack.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index b198dde79e59..b607f6dfc5e6 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -879,10 +879,12 @@ static u8 spectre_bhb_loop_affected(void) static const struct midr_range spectre_bhb_k132_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + {}, }; static const struct midr_range spectre_bhb_k38_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + {}, }; static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), From 28580052e634fe8fa327e6f25c35590374be754b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 1 May 2025 12:38:00 -0400 Subject: [PATCH 343/559] bcachefs: add missing sched_annotate_sleep() 00594 ------------[ cut here ]------------ 00594 do not call blocking ops when !TASK_RUNNING; state=2 set at [<000000003e51ef4a>] prepare_to_wait_event+0x5c/0x1c0 00594 WARNING: CPU: 12 PID: 1117 at kernel/sched/core.c:8741 __might_sleep+0x74/0x88 00594 Modules linked in: 00594 CPU: 12 UID: 0 PID: 1117 Comm: umount Not tainted 6.15.0-rc4-ktest-g3a72e369412d #21845 PREEMPT 00594 Hardware name: linux,dummy-virt (DT) 00594 pstate: 60001005 (nZCv daif -PAN -UAO -TCO -DIT +SSBS BTYPE=--) 00594 pc : __might_sleep+0x74/0x88 00594 lr : __might_sleep+0x74/0x88 00594 sp : ffffff80c8d67a90 00594 x29: ffffff80c8d67a90 x28: ffffff80f5903500 x27: 0000000000000000 00594 x26: 0000000000000000 x25: ffffff80cf5002a0 x24: ffffffc087dad000 00594 x23: ffffff80c8d67b40 x22: 0000000000000000 x21: 0000000000000000 00594 x20: 0000000000000242 x19: ffffffc080b92020 x18: 00000000ffffffff 00594 x17: 30303c5b20746120 x16: 74657320323d6574 x15: 617473203b474e49 00594 x14: 0000000000000001 x13: 00000000000c0000 x12: ffffff80facc0000 00594 x11: 0000000000000001 x10: 0000000000000001 x9 : ffffffc0800b0774 00594 x8 : c0000000fffbffff x7 : ffffffc087dac670 x6 : 00000000015fffa8 00594 x5 : ffffff80facbffa8 x4 : ffffff80fbd30b90 x3 : 0000000000000000 00594 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffffff80f5903500 00594 Call trace: 00594 __might_sleep+0x74/0x88 (P) 00594 __mutex_lock+0x64/0x8d8 00594 mutex_lock_nested+0x28/0x38 00594 bch2_fs_ec_flush+0xf8/0x128 00594 __bch2_fs_read_only+0x54/0x1d8 00594 bch2_fs_read_only+0x3e0/0x438 00594 __bch2_fs_stop+0x5c/0x250 00594 bch2_put_super+0x18/0x28 00594 generic_shutdown_super+0x6c/0x140 00594 bch2_kill_sb+0x1c/0x38 00594 deactivate_locked_super+0x54/0xd0 00594 deactivate_super+0x70/0x90 00594 cleanup_mnt+0xec/0x188 00594 __cleanup_mnt+0x18/0x28 00594 task_work_run+0x90/0xd8 00594 do_notify_resume+0x138/0x148 00594 el0_svc+0x9c/0xa0 00594 el0t_64_sync_handler+0x104/0x130 00594 el0t_64_sync+0x154/0x158 Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index a396865e8b17..fff58b78327c 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -2204,10 +2204,10 @@ void bch2_fs_ec_stop(struct bch_fs *c) static bool bch2_fs_ec_flush_done(struct bch_fs *c) { - bool ret; + sched_annotate_sleep(); mutex_lock(&c->ec_stripe_new_lock); - ret = list_empty(&c->ec_stripe_new_list); + bool ret = list_empty(&c->ec_stripe_new_list); mutex_unlock(&c->ec_stripe_new_lock); return ret; From f5178c41bb43444a6008150fe6094497135d07cb Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Tue, 22 Apr 2025 20:30:25 +0900 Subject: [PATCH 344/559] tracing: Fix oob write in trace_seq_to_buffer() syzbot reported this bug: ================================================================== BUG: KASAN: slab-out-of-bounds in trace_seq_to_buffer kernel/trace/trace.c:1830 [inline] BUG: KASAN: slab-out-of-bounds in tracing_splice_read_pipe+0x6be/0xdd0 kernel/trace/trace.c:6822 Write of size 4507 at addr ffff888032b6b000 by task syz.2.320/7260 CPU: 1 UID: 0 PID: 7260 Comm: syz.2.320 Not tainted 6.15.0-rc1-syzkaller-00301-g3bde70a2c827 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xc3/0x670 mm/kasan/report.c:521 kasan_report+0xe0/0x110 mm/kasan/report.c:634 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0xef/0x1a0 mm/kasan/generic.c:189 __asan_memcpy+0x3c/0x60 mm/kasan/shadow.c:106 trace_seq_to_buffer kernel/trace/trace.c:1830 [inline] tracing_splice_read_pipe+0x6be/0xdd0 kernel/trace/trace.c:6822 .... ================================================================== It has been reported that trace_seq_to_buffer() tries to copy more data than PAGE_SIZE to buf. Therefore, to prevent this, we should use the smaller of trace_seq_used(&iter->seq) and PAGE_SIZE as an argument. Link: https://lore.kernel.org/20250422113026.13308-1-aha310510@gmail.com Reported-by: syzbot+c8cd2d2c412b868263fb@syzkaller.appspotmail.com Fixes: 3c56819b14b0 ("tracing: splice support for tracing_pipe") Suggested-by: Steven Rostedt Signed-off-by: Jeongjun Park Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8ddf6b17215c..6d52dc108f00 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6821,13 +6821,14 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, /* Copy the data into the page, so we can start over. */ ret = trace_seq_to_buffer(&iter->seq, page_address(spd.pages[i]), - trace_seq_used(&iter->seq)); + min((size_t)trace_seq_used(&iter->seq), + PAGE_SIZE)); if (ret < 0) { __free_page(spd.pages[i]); break; } spd.partial[i].offset = 0; - spd.partial[i].len = trace_seq_used(&iter->seq); + spd.partial[i].len = ret; trace_seq_init(&iter->seq); } From 3c1d9cfa8458a4d6b6cd9bc3ca7bb1591130a31c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 22 Apr 2025 23:13:35 +0100 Subject: [PATCH 345/559] ftrace: Fix NULL memory allocation check The check for a failed memory location is incorrectly checking the wrong level of pointer indirection by checking !filter_hash rather than !*filter_hash. Fix this. Cc: asami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250422221335.89896-1-colin.i.king@gmail.com Fixes: 0ae6b8ce200d ("ftrace: Fix accounting of subop hashes") Signed-off-by: Colin Ian King Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 61130bb34d6c..6981830c3128 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3436,7 +3436,7 @@ static int add_next_hash(struct ftrace_hash **filter_hash, struct ftrace_hash ** /* Copy the subops hash */ *filter_hash = alloc_and_copy_ftrace_hash(size_bits, subops_hash->filter_hash); - if (!filter_hash) + if (!*filter_hash) return -ENOMEM; /* Remove any notrace functions from the copy */ remove_hash(*filter_hash, subops_hash->notrace_hash); From 1be8e54a1e0f0a4bf70e3d65f94ca1738ee4f1f3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 1 May 2025 15:19:09 -0400 Subject: [PATCH 346/559] tracing: Fix trace_adjust_address() when there is no modules in scratch area The function trace_adjust_address() is used to map addresses of modules stored in the persistent memory and are also loaded in the current boot to return the current address for the module. If there's only one module entry, it will simply use that, otherwise it performs a bsearch of the entry array to find the modules to offset with. The issue is if there are no modules in the array. The code does not account for that and ends up referencing the first element in the array which does not exist and causes a crash. If nr_entries is zero, exit out early as if this was a core kernel address. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250501151909.65910359@gandalf.local.home Fixes: 35a380ddbc653 ("tracing: Show last module text symbols in the stacktrace") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6d52dc108f00..5b8db27fb6ef 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6043,8 +6043,10 @@ unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) tscratch = tr->scratch; /* if there is no tscrach, module_delta must be NULL. */ module_delta = READ_ONCE(tr->module_delta); - if (!module_delta || tscratch->entries[0].mod_addr > addr) + if (!module_delta || !tscratch->nr_entries || + tscratch->entries[0].mod_addr > addr) { return addr + tr->text_delta; + } /* Note that entries must be sorted. */ nr_entries = tscratch->nr_entries; From 4426e6b4ecf632bb75d973051e1179b8bfac2320 Mon Sep 17 00:00:00 2001 From: Aaron Kling Date: Wed, 23 Apr 2025 21:03:03 -0500 Subject: [PATCH 347/559] spi: tegra114: Don't fail set_cs_timing when delays are zero The original code would skip null delay pointers, but when the pointers were converted to point within the spi_device struct, the check was not updated to skip delays of zero. Hence all spi devices that didn't set delays would fail to probe. Fixes: 04e6bb0d6bb1 ("spi: modify set_cs_timing parameter") Cc: stable@vger.kernel.org Signed-off-by: Aaron Kling Link: https://patch.msgid.link/20250423-spi-tegra114-v1-1-2d608bcc12f9@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-tegra114.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 3822d7c8d8ed..2a8bb798e95b 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -728,9 +728,9 @@ static int tegra_spi_set_hw_cs_timing(struct spi_device *spi) u32 inactive_cycles; u8 cs_state; - if (setup->unit != SPI_DELAY_UNIT_SCK || - hold->unit != SPI_DELAY_UNIT_SCK || - inactive->unit != SPI_DELAY_UNIT_SCK) { + if ((setup->unit && setup->unit != SPI_DELAY_UNIT_SCK) || + (hold->unit && hold->unit != SPI_DELAY_UNIT_SCK) || + (inactive->unit && inactive->unit != SPI_DELAY_UNIT_SCK)) { dev_err(&spi->dev, "Invalid delay unit %d, should be SPI_DELAY_UNIT_SCK\n", SPI_DELAY_UNIT_SCK); From 6846100b00d97d3d6f05766ae86a0d821d849e78 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Fri, 2 May 2025 04:01:31 +0800 Subject: [PATCH 348/559] bcachefs: Remove incorrect __counted_by annotation This actually reverts 86e92eeeb237 ("bcachefs: Annotate struct bch_xattr with __counted_by()"). After the x_name, there is a value. According to the disscussion[1], __counted_by assumes that the flexible array member contains exactly the amount of elements that are specified. Now there are users came across a false positive detection of an out of bounds write caused by the __counted_by here[2], so revert that. [1] https://lore.kernel.org/lkml/Zv8VDKWN1GzLRT-_@archlinux/T/#m0ce9541c5070146320efd4f928cc1ff8de69e9b2 [2] https://privatebin.net/?a0d4e97d590d71e1#9bLmp2Kb5NU6X6cZEucchDcu88HzUQwHUah8okKPReEt Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/xattr_format.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/xattr_format.h b/fs/bcachefs/xattr_format.h index c7916011ef34..67426e33d04e 100644 --- a/fs/bcachefs/xattr_format.h +++ b/fs/bcachefs/xattr_format.h @@ -13,7 +13,13 @@ struct bch_xattr { __u8 x_type; __u8 x_name_len; __le16 x_val_len; - __u8 x_name[] __counted_by(x_name_len); + /* + * x_name contains the name and value counted by + * x_name_len + x_val_len. The introduction of + * __counted_by(x_name_len) caused a false positive + * detection of an out of bounds write. + */ + __u8 x_name[]; } __packed __aligned(8); #endif /* _BCACHEFS_XATTR_FORMAT_H */ From 53e3e5babc0963a92d856a5ec0ce92c59f54bc12 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 30 Apr 2025 11:18:28 +0900 Subject: [PATCH 349/559] ksmbd: prevent rename with empty string Client can send empty newname string to ksmbd server. It will cause a kernel oops from d_alloc. This patch return the error when attempting to rename a file or directory with an empty new name string. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 46aa08245742..f2a2be8467c6 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -633,6 +633,11 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls) return name; } + if (*name == '\0') { + kfree(name); + return ERR_PTR(-EINVAL); + } + if (*name == '\\') { pr_err("not allow directory name included leading slash\n"); kfree(name); From eb4447bcce915b43b691123118893fca4f372a8f Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Wed, 30 Apr 2025 11:16:23 +0800 Subject: [PATCH 350/559] ksmbd: fix memory leak in parse_lease_state() The previous patch that added bounds check for create lease context introduced a memory leak. When the bounds check fails, the function returns NULL without freeing the previously allocated lease_ctx_info structure. This patch fixes the issue by adding kfree(lreq) before returning NULL in both boundary check cases. Fixes: bab703ed8472 ("ksmbd: add bounds check for create lease context") Signed-off-by: Wang Zhaolong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 81a29857b1e3..03f606afad93 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1496,7 +1496,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < sizeof(struct create_lease_v2) - 4) - return NULL; + goto err_out; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; @@ -1512,7 +1512,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < sizeof(struct create_lease)) - return NULL; + goto err_out; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; @@ -1521,6 +1521,9 @@ struct lease_ctx_info *parse_lease_state(void *open_req) lreq->version = 1; } return lreq; +err_out: + kfree(lreq); + return NULL; } /** From 0a8f11f8569e7ed16cbcedeb28c4350f6378fea6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 1 May 2025 22:41:28 -0400 Subject: [PATCH 351/559] tracing: Do not take trace_event_sem in print_event_fields() On some paths in print_event_fields() it takes the trace_event_sem for read, even though it should always be held when the function is called. Remove the taking of that mutex and add a lockdep_assert_held_read() to make sure the trace_event_sem is held when print_event_fields() is called. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250501224128.0b1f0571@batman.local.home Fixes: 80a76994b2d88 ("tracing: Add "fields" option to show raw trace event fields") Reported-by: syzbot+441582c1592938fccf09@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/6813ff5e.050a0220.14dd7d.001b.GAE@google.com/ Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index fee40ffbd490..b9ab06c99543 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1042,11 +1042,12 @@ enum print_line_t print_event_fields(struct trace_iterator *iter, struct trace_event_call *call; struct list_head *head; + lockdep_assert_held_read(&trace_event_sem); + /* ftrace defined events have separate call structures */ if (event->type <= __TRACE_LAST_TYPE) { bool found = false; - down_read(&trace_event_sem); list_for_each_entry(call, &ftrace_events, list) { if (call->event.type == event->type) { found = true; @@ -1056,7 +1057,6 @@ enum print_line_t print_event_fields(struct trace_iterator *iter, if (call->event.type > __TRACE_LAST_TYPE) break; } - up_read(&trace_event_sem); if (!found) { trace_seq_printf(&iter->seq, "UNKNOWN TYPE %d\n", event->type); goto out; From 3393c90daf4e1704c6a5c3833439f461663a2e1d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 21 Apr 2025 08:15:38 -0700 Subject: [PATCH 352/559] drm/xe/hwmon: Fix kernel version documentation for temperature The version in the sysfs attribute should correspond to the version in which this is enabled and visible for end users. It usually doesn't correspond to the version in which the patch was developed, but rather a release that will contain it. Update them to 6.15. Fixes: dac328dea701 ("drm/xe/hwmon: expose package and vram temperature") Reported-by: Ulisses Furquim Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4840 Reviewed-by: Rodrigo Vivi Reviewed-by: Raag Jadav Link: https://lore.kernel.org/r/20250421-hwmon-doc-fix-v1-1-9f68db702249@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 8500393a8e6c58e5e7c135133ad792fc6fd5b6f4) Signed-off-by: Lucas De Marchi --- Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index 9bce281314df..cb207c79680d 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -111,7 +111,7 @@ Description: RO. Package current voltage in millivolt. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/temp2_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.15 Contact: intel-xe@lists.freedesktop.org Description: RO. Package temperature in millidegree Celsius. @@ -119,7 +119,7 @@ Description: RO. Package temperature in millidegree Celsius. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/temp3_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.15 Contact: intel-xe@lists.freedesktop.org Description: RO. VRAM temperature in millidegree Celsius. From e8e3a804f3845a147fbdf73f910c12ddb3a2a86f Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Sun, 27 Apr 2025 19:47:52 -0700 Subject: [PATCH 353/559] drm/gpusvm: set has_dma_mapping inside mapping loop The 'has_dma_mapping' flag should be set once there is a mapping so it could be unmapped in case of error. v2: - Resend for CI Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Signed-off-by: Dafna Hirschfeld Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250428024752.881292-1-matthew.brost@intel.com (cherry picked from commit f64cf7b681af72d3f715c0d0fd72091a54471c1a) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/drm_gpusvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 38431e8360e7..de424e670995 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1469,9 +1469,9 @@ map_pages: } i += 1 << order; num_dma_mapped = i; + range->flags.has_dma_mapping = true; } - range->flags.has_dma_mapping = true; if (zdd) { range->flags.has_devmem_pages = true; range->dpagemap = dpagemap; From cac01bd178d6a2a23727f138d647ce1a0e8a73a1 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 2 May 2025 09:09:39 +0200 Subject: [PATCH 354/559] usb: usbtmc: Fix erroneous get_stb ioctl error returns wait_event_interruptible_timeout returns a long The return was being assigned to an int causing an integer overflow when the remaining jiffies > INT_MAX resulting in random error returns. Use a long return value and convert to int ioctl return only on error. When the return value of wait_event_interruptible_timeout was <= INT_MAX the number of remaining jiffies was returned which has no meaning for the user. Return 0 on success. Reported-by: Michael Katzmann Fixes: dbf3e7f654c0 ("Implement an ioctl to support the USMTMC-USB488 READ_STATUS_BYTE operation.") Cc: stable@vger.kernel.org Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250502070941.31819-2-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 634c3bcbb413..e91f11c1ab5f 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -482,6 +482,7 @@ static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) u8 *buffer; u8 tag; int rv; + long wait_rv; dev_dbg(dev, "Enter ioctl_read_stb iin_ep_present: %d\n", data->iin_ep_present); @@ -511,16 +512,17 @@ static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) } if (data->iin_ep_present) { - rv = wait_event_interruptible_timeout( + wait_rv = wait_event_interruptible_timeout( data->waitq, atomic_read(&data->iin_data_valid) != 0, file_data->timeout); - if (rv < 0) { - dev_dbg(dev, "wait interrupted %d\n", rv); + if (wait_rv < 0) { + dev_dbg(dev, "wait interrupted %ld\n", wait_rv); + rv = wait_rv; goto exit; } - if (rv == 0) { + if (wait_rv == 0) { dev_dbg(dev, "wait timed out\n"); rv = -ETIMEDOUT; goto exit; @@ -539,6 +541,8 @@ static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) dev_dbg(dev, "stb:0x%02x received %d\n", (unsigned int)*stb, rv); + rv = 0; + exit: /* bump interrupt bTag */ data->iin_bTag += 1; From a9747c9b8b59ab4207effd20eb91a890acb44e16 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 2 May 2025 09:09:40 +0200 Subject: [PATCH 355/559] usb: usbtmc: Fix erroneous wait_srq ioctl return wait_event_interruptible_timeout returns a long The return was being assigned to an int causing an integer overflow when the remaining jiffies > INT_MAX resulting in random error returns. Use a long return value, converting to the int ioctl return only on error. Fixes: 739240a9f6ac ("usb: usbtmc: Add ioctl USBTMC488_IOCTL_WAIT_SRQ") Cc: stable@vger.kernel.org Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250502070941.31819-3-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index e91f11c1ab5f..4761a5ecadd6 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -606,9 +606,9 @@ static int usbtmc488_ioctl_wait_srq(struct usbtmc_file_data *file_data, { struct usbtmc_device_data *data = file_data->data; struct device *dev = &data->intf->dev; - int rv; u32 timeout; unsigned long expire; + long wait_rv; if (!data->iin_ep_present) { dev_dbg(dev, "no interrupt endpoint present\n"); @@ -622,25 +622,24 @@ static int usbtmc488_ioctl_wait_srq(struct usbtmc_file_data *file_data, mutex_unlock(&data->io_mutex); - rv = wait_event_interruptible_timeout( - data->waitq, - atomic_read(&file_data->srq_asserted) != 0 || - atomic_read(&file_data->closing), - expire); + wait_rv = wait_event_interruptible_timeout( + data->waitq, + atomic_read(&file_data->srq_asserted) != 0 || + atomic_read(&file_data->closing), + expire); mutex_lock(&data->io_mutex); /* Note! disconnect or close could be called in the meantime */ if (atomic_read(&file_data->closing) || data->zombie) - rv = -ENODEV; + return -ENODEV; - if (rv < 0) { - /* dev can be invalid now! */ - pr_debug("%s - wait interrupted %d\n", __func__, rv); - return rv; + if (wait_rv < 0) { + dev_dbg(dev, "%s - wait interrupted %ld\n", __func__, wait_rv); + return wait_rv; } - if (rv == 0) { + if (wait_rv == 0) { dev_dbg(dev, "%s - wait timed out\n", __func__); return -ETIMEDOUT; } From 4e77d3ec7c7c0d9535ccf1138827cb9bb5480b9b Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 2 May 2025 09:09:41 +0200 Subject: [PATCH 356/559] usb: usbtmc: Fix erroneous generic_read ioctl return wait_event_interruptible_timeout returns a long The return value was being assigned to an int causing an integer overflow when the remaining jiffies > INT_MAX which resulted in random error returns. Use a long return value, converting to the int ioctl return only on error. Fixes: bb99794a4792 ("usb: usbtmc: Add ioctl for vendor specific read") Cc: stable@vger.kernel.org Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250502070941.31819-4-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 4761a5ecadd6..740d2d2b19fb 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -833,6 +833,7 @@ static ssize_t usbtmc_generic_read(struct usbtmc_file_data *file_data, unsigned long expire; int bufcount = 1; int again = 0; + long wait_rv; /* mutex already locked */ @@ -945,19 +946,24 @@ static ssize_t usbtmc_generic_read(struct usbtmc_file_data *file_data, if (!(flags & USBTMC_FLAG_ASYNC)) { dev_dbg(dev, "%s: before wait time %lu\n", __func__, expire); - retval = wait_event_interruptible_timeout( + wait_rv = wait_event_interruptible_timeout( file_data->wait_bulk_in, usbtmc_do_transfer(file_data), expire); - dev_dbg(dev, "%s: wait returned %d\n", - __func__, retval); + dev_dbg(dev, "%s: wait returned %ld\n", + __func__, wait_rv); - if (retval <= 0) { - if (retval == 0) - retval = -ETIMEDOUT; + if (wait_rv < 0) { + retval = wait_rv; goto error; } + + if (wait_rv == 0) { + retval = -ETIMEDOUT; + goto error; + } + } urb = usb_get_from_anchor(&file_data->in_anchor); From 6f9a8ab796c6528d22de3c504c81fce7dde63d8a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Apr 2025 23:23:29 -0700 Subject: [PATCH 357/559] btrfs: compression: adjust cb->compressed_folios allocation type In preparation for making the kmalloc() family of allocators type aware, we need to make sure that the returned type from the allocation matches the type of the variable being assigned. (Before, the allocator would always return "void *", which can be implicitly cast to any pointer type.) The assigned type is "struct folio **" but the returned type will be "struct page **". These are the same allocation size (pointer size), but the types don't match. Adjust the allocation type to match the assignment. Reviewed-by: Qu Wenruo Signed-off-by: Kees Cook Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index e7f8ee5d48a4..7f11ef559be6 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -606,7 +606,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio) free_extent_map(em); cb->nr_folios = DIV_ROUND_UP(compressed_len, PAGE_SIZE); - cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct page *), GFP_NOFS); + cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct folio *), GFP_NOFS); if (!cb->compressed_folios) { ret = BLK_STS_RESOURCE; goto out_free_bio; From bc7e0975093567f51be8e1bdf4aa5900a3cf0b1e Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Fri, 25 Apr 2025 09:25:06 -0400 Subject: [PATCH 358/559] btrfs: correct the order of prelim_ref arguments in btrfs__prelim_ref btrfs_prelim_ref() calls the old and new reference variables in the incorrect order. This causes a NULL pointer dereference because oldref is passed as NULL to trace_btrfs_prelim_ref_insert(). Note, trace_btrfs_prelim_ref_insert() is being called with newref as oldref (and oldref as NULL) on purpose in order to print out the values of newref. To reproduce: echo 1 > /sys/kernel/debug/tracing/events/btrfs/btrfs_prelim_ref_insert/enable Perform some writeback operations. Backtrace: BUG: kernel NULL pointer dereference, address: 0000000000000018 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 115949067 P4D 115949067 PUD 11594a067 PMD 0 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 1 UID: 0 PID: 1188 Comm: fsstress Not tainted 6.15.0-rc2-tester+ #47 PREEMPT(voluntary) 7ca2cef72d5e9c600f0c7718adb6462de8149622 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014 RIP: 0010:trace_event_raw_event_btrfs__prelim_ref+0x72/0x130 Code: e8 43 81 9f ff 48 85 c0 74 78 4d 85 e4 0f 84 8f 00 00 00 49 8b 94 24 c0 06 00 00 48 8b 0a 48 89 48 08 48 8b 52 08 48 89 50 10 <49> 8b 55 18 48 89 50 18 49 8b 55 20 48 89 50 20 41 0f b6 55 28 88 RSP: 0018:ffffce44820077a0 EFLAGS: 00010286 RAX: ffff8c6b403f9014 RBX: ffff8c6b55825730 RCX: 304994edf9cf506b RDX: d8b11eb7f0fdb699 RSI: ffff8c6b403f9010 RDI: ffff8c6b403f9010 RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000010 R10: 00000000ffffffff R11: 0000000000000000 R12: ffff8c6b4e8fb000 R13: 0000000000000000 R14: ffffce44820077a8 R15: ffff8c6b4abd1540 FS: 00007f4dc6813740(0000) GS:ffff8c6c1d378000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000018 CR3: 000000010eb42000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: prelim_ref_insert+0x1c1/0x270 find_parent_nodes+0x12a6/0x1ee0 ? __entry_text_end+0x101f06/0x101f09 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 btrfs_is_data_extent_shared+0x167/0x640 ? fiemap_process_hole+0xd0/0x2c0 extent_fiemap+0xa5c/0xbc0 ? __entry_text_end+0x101f05/0x101f09 btrfs_fiemap+0x7e/0xd0 do_vfs_ioctl+0x425/0x9d0 __x64_sys_ioctl+0x75/0xc0 Signed-off-by: Goldwyn Rodrigues Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 549ab3b41961..3efc00cc1bcd 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1928,7 +1928,7 @@ DECLARE_EVENT_CLASS(btrfs__prelim_ref, TP_PROTO(const struct btrfs_fs_info *fs_info, const struct prelim_ref *oldref, const struct prelim_ref *newref, u64 tree_size), - TP_ARGS(fs_info, newref, oldref, tree_size), + TP_ARGS(fs_info, oldref, newref, tree_size), TP_STRUCT__entry_btrfs( __field( u64, root_id ) From d6fe0c69b3aa5c985380b794bdf8e6e9b1811e60 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 25 Apr 2025 12:47:50 -0700 Subject: [PATCH 359/559] btrfs: handle empty eb->folios in num_extent_folios() num_extent_folios() unconditionally calls folio_order() on eb->folios[0]. If that is NULL this will be a segfault. It is reasonable for it to return 0 as the number of folios in the eb when the first entry is NULL, so do that instead. Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2e261892c7bc..f5b28b5c4908 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -298,6 +298,8 @@ static inline int __pure num_extent_pages(const struct extent_buffer *eb) */ static inline int __pure num_extent_folios(const struct extent_buffer *eb) { + if (!eb->folios[0]) + return 0; if (folio_order(eb->folios[0])) return 1; return num_extent_pages(eb); From f95d186255b319c48a365d47b69bd997fecb674e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 29 Apr 2025 15:17:50 +0930 Subject: [PATCH 360/559] btrfs: avoid NULL pointer dereference if no valid csum tree [BUG] When trying read-only scrub on a btrfs with rescue=idatacsums mount option, it will crash with the following call trace: BUG: kernel NULL pointer dereference, address: 0000000000000208 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page CPU: 1 UID: 0 PID: 835 Comm: btrfs Tainted: G O 6.15.0-rc3-custom+ #236 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown 02/02/2022 RIP: 0010:btrfs_lookup_csums_bitmap+0x49/0x480 [btrfs] Call Trace: scrub_find_fill_first_stripe+0x35b/0x3d0 [btrfs] scrub_simple_mirror+0x175/0x290 [btrfs] scrub_stripe+0x5f7/0x6f0 [btrfs] scrub_chunk+0x9a/0x150 [btrfs] scrub_enumerate_chunks+0x333/0x660 [btrfs] btrfs_scrub_dev+0x23e/0x600 [btrfs] btrfs_ioctl+0x1dcf/0x2f80 [btrfs] __x64_sys_ioctl+0x97/0xc0 do_syscall_64+0x4f/0x120 entry_SYSCALL_64_after_hwframe+0x76/0x7e [CAUSE] Mount option "rescue=idatacsums" will completely skip loading the csum tree, so that any data read will not find any data csum thus we will ignore data checksum verification. Normally call sites utilizing csum tree will check the fs state flag NO_DATA_CSUMS bit, but unfortunately scrub does not check that bit at all. This results in scrub to call btrfs_search_slot() on a NULL pointer and triggered above crash. [FIX] Check both extent and csum tree root before doing any tree search. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2c5edcee9450..c3b2e29e3e01 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1541,8 +1541,8 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, u64 extent_gen; int ret; - if (unlikely(!extent_root)) { - btrfs_err(fs_info, "no valid extent root for scrub"); + if (unlikely(!extent_root || !csum_root)) { + btrfs_err(fs_info, "no valid extent or csum root for scrub"); return -EUCLEAN; } memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) * From 8fb1dcbbcc1ffe6ed7cf3f0f96d2737491dd1fbf Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 17 Jan 2025 09:09:34 +1030 Subject: [PATCH 361/559] Revert "btrfs: canonicalize the device path before adding it" This reverts commit 7e06de7c83a746e58d4701e013182af133395188. Commit 7e06de7c83a7 ("btrfs: canonicalize the device path before adding it") tries to make btrfs to use "/dev/mapper/*" name first, then any filename inside "/dev/" as the device path. This is mostly fine when there is only the root namespace involved, but when multiple namespace are involved, things can easily go wrong for the d_path() usage. As d_path() returns a file path that is namespace dependent, the resulted string may not make any sense in another namespace. Furthermore, the "/dev/" prefix checks itself is not reliable, one can still make a valid initramfs without devtmpfs, and fill all needed device nodes manually. Overall the userspace has all its might to pass whatever device path for mount, and we are not going to win the war trying to cover every corner case. So just revert that commit, and do no extra d_path() based file path sanity check. CC: stable@vger.kernel.org # 6.12+ Link: https://lore.kernel.org/linux-fsdevel/20250115185608.GA2223535@zen.localdomain/ Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 91 +--------------------------------------------- 1 file changed, 1 insertion(+), 90 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c8c21c55be53..8e6b6fed7429 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -733,82 +733,6 @@ const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb) return has_metadata_uuid ? sb->metadata_uuid : sb->fsid; } -/* - * We can have very weird soft links passed in. - * One example is "/proc/self/fd/", which can be a soft link to - * a block device. - * - * But it's never a good idea to use those weird names. - * Here we check if the path (not following symlinks) is a good one inside - * "/dev/". - */ -static bool is_good_dev_path(const char *dev_path) -{ - struct path path = { .mnt = NULL, .dentry = NULL }; - char *path_buf = NULL; - char *resolved_path; - bool is_good = false; - int ret; - - if (!dev_path) - goto out; - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!path_buf) - goto out; - - /* - * Do not follow soft link, just check if the original path is inside - * "/dev/". - */ - ret = kern_path(dev_path, 0, &path); - if (ret) - goto out; - resolved_path = d_path(&path, path_buf, PATH_MAX); - if (IS_ERR(resolved_path)) - goto out; - if (strncmp(resolved_path, "/dev/", strlen("/dev/"))) - goto out; - is_good = true; -out: - kfree(path_buf); - path_put(&path); - return is_good; -} - -static int get_canonical_dev_path(const char *dev_path, char *canonical) -{ - struct path path = { .mnt = NULL, .dentry = NULL }; - char *path_buf = NULL; - char *resolved_path; - int ret; - - if (!dev_path) { - ret = -EINVAL; - goto out; - } - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!path_buf) { - ret = -ENOMEM; - goto out; - } - - ret = kern_path(dev_path, LOOKUP_FOLLOW, &path); - if (ret) - goto out; - resolved_path = d_path(&path, path_buf, PATH_MAX); - if (IS_ERR(resolved_path)) { - ret = PTR_ERR(resolved_path); - goto out; - } - ret = strscpy(canonical, resolved_path, PATH_MAX); -out: - kfree(path_buf); - path_put(&path); - return ret; -} - static bool is_same_device(struct btrfs_device *device, const char *new_path) { struct path old = { .mnt = NULL, .dentry = NULL }; @@ -1513,23 +1437,12 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, bool new_device_added = false; struct btrfs_device *device = NULL; struct file *bdev_file; - char *canonical_path = NULL; u64 bytenr; dev_t devt; int ret; lockdep_assert_held(&uuid_mutex); - if (!is_good_dev_path(path)) { - canonical_path = kmalloc(PATH_MAX, GFP_KERNEL); - if (canonical_path) { - ret = get_canonical_dev_path(path, canonical_path); - if (ret < 0) { - kfree(canonical_path); - canonical_path = NULL; - } - } - } /* * Avoid an exclusive open here, as the systemd-udev may initiate the * device scan which may race with the user's mount or mkfs command, @@ -1574,8 +1487,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, goto free_disk_super; } - device = device_list_add(canonical_path ? : path, disk_super, - &new_device_added); + device = device_list_add(path, disk_super, &new_device_added); if (!IS_ERR(device) && new_device_added) btrfs_free_stale_devices(device->devt, device); @@ -1584,7 +1496,6 @@ free_disk_super: error_bdev_put: fput(bdev_file); - kfree(canonical_path); return device; } From 38e541051e1d19e8b1479a6af587a7884653e041 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Thu, 1 May 2025 02:10:48 +0800 Subject: [PATCH 362/559] btrfs: open code folio_index() in btree_clear_folio_dirty_tag() The folio_index() helper is only needed for mixed usage of page cache and swap cache, for pure page cache usage, the caller can just use folio->index instead. It can't be a swap cache folio here. Swap mapping may only call into fs through 'swap_rw' but btrfs does not use that method for swap. Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Qu Wenruo Signed-off-by: Kairui Song Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8515c31f563b..13bdd60da3c7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3508,8 +3508,8 @@ static void btree_clear_folio_dirty_tag(struct folio *folio) ASSERT(folio_test_locked(folio)); xa_lock_irq(&folio->mapping->i_pages); if (!folio_test_dirty(folio)) - __xa_clear_mark(&folio->mapping->i_pages, - folio_index(folio), PAGECACHE_TAG_DIRTY); + __xa_clear_mark(&folio->mapping->i_pages, folio->index, + PAGECACHE_TAG_DIRTY); xa_unlock_irq(&folio->mapping->i_pages); } From f31fe8165d365379d858c53bef43254c7d6d1cfd Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Fri, 2 May 2025 13:18:10 +0530 Subject: [PATCH 363/559] uio_hv_generic: Fix sysfs creation path for ring buffer On regular bootup, devices get registered to VMBus first, so when uio_hv_generic driver for a particular device type is probed, the device is already initialized and added, so sysfs creation in hv_uio_probe() works fine. However, when the device is removed and brought back, the channel gets rescinded and the device again gets registered to VMBus. However this time, the uio_hv_generic driver is already registered to probe for that device and in this case sysfs creation is tried before the device's kobject gets initialized completely. Fix this by moving the core logic of sysfs creation of ring buffer, from uio_hv_generic to HyperV's VMBus driver, where the rest of the sysfs attributes for the channels are defined. While doing that, make use of attribute groups and macros, instead of creating sysfs directly, to ensure better error handling and code flow. Problematic path: vmbus_process_offer (A new offer comes for the VMBus device) vmbus_add_channel_work vmbus_device_register |-> device_register | |... | |-> hv_uio_probe | |... | |-> sysfs_create_bin_file (leads to a warning as | the primary channel's kobject, which is used to | create the sysfs file, is not yet initialized) |-> kset_create_and_add |-> vmbus_add_channel_kobj (initialization of the primary channel's kobject happens later) Above code flow is sequential and the warning is always reproducible in this path. Fixes: 9ab877a6ccf8 ("uio_hv_generic: make ring buffer attribute for primary channel") Cc: stable@kernel.org Suggested-by: Saurabh Sengar Suggested-by: Michael Kelley Reviewed-by: Michael Kelley Tested-by: Michael Kelley Reviewed-by: Dexuan Cui Signed-off-by: Naman Jain Link: https://lore.kernel.org/r/20250502074811.2022-2-namjain@linux.microsoft.com Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hyperv_vmbus.h | 6 +++ drivers/hv/vmbus_drv.c | 100 ++++++++++++++++++++++++++++++++++- drivers/uio/uio_hv_generic.c | 39 ++++++-------- include/linux/hyperv.h | 6 +++ 4 files changed, 128 insertions(+), 23 deletions(-) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 29780f3a7478..0b450e53161e 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -477,4 +477,10 @@ static inline int hv_debug_add_dev_dir(struct hv_device *dev) #endif /* CONFIG_HYPERV_TESTING */ +/* Create and remove sysfs entry for memory mapped ring buffers for a channel */ +int hv_create_ring_sysfs(struct vmbus_channel *channel, + int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel, + struct vm_area_struct *vma)); +int hv_remove_ring_sysfs(struct vmbus_channel *channel); + #endif /* _HYPERV_VMBUS_H */ diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 8d3cff42bdbb..0f16a83cc2d6 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1802,6 +1802,27 @@ static ssize_t subchannel_id_show(struct vmbus_channel *channel, } static VMBUS_CHAN_ATTR_RO(subchannel_id); +static int hv_mmap_ring_buffer_wrapper(struct file *filp, struct kobject *kobj, + const struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + struct vmbus_channel *channel = container_of(kobj, struct vmbus_channel, kobj); + + /* + * hv_(create|remove)_ring_sysfs implementation ensures that mmap_ring_buffer + * is not NULL. + */ + return channel->mmap_ring_buffer(channel, vma); +} + +static struct bin_attribute chan_attr_ring_buffer = { + .attr = { + .name = "ring", + .mode = 0600, + }, + .size = 2 * SZ_2M, + .mmap = hv_mmap_ring_buffer_wrapper, +}; static struct attribute *vmbus_chan_attrs[] = { &chan_attr_out_mask.attr, &chan_attr_in_mask.attr, @@ -1821,6 +1842,11 @@ static struct attribute *vmbus_chan_attrs[] = { NULL }; +static struct bin_attribute *vmbus_chan_bin_attrs[] = { + &chan_attr_ring_buffer, + NULL +}; + /* * Channel-level attribute_group callback function. Returns the permission for * each attribute, and returns 0 if an attribute is not visible. @@ -1841,9 +1867,24 @@ static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj, return attr->mode; } +static umode_t vmbus_chan_bin_attr_is_visible(struct kobject *kobj, + const struct bin_attribute *attr, int idx) +{ + const struct vmbus_channel *channel = + container_of(kobj, struct vmbus_channel, kobj); + + /* Hide ring attribute if channel's ring_sysfs_visible is set to false */ + if (attr == &chan_attr_ring_buffer && !channel->ring_sysfs_visible) + return 0; + + return attr->attr.mode; +} + static const struct attribute_group vmbus_chan_group = { .attrs = vmbus_chan_attrs, - .is_visible = vmbus_chan_attr_is_visible + .bin_attrs = vmbus_chan_bin_attrs, + .is_visible = vmbus_chan_attr_is_visible, + .is_bin_visible = vmbus_chan_bin_attr_is_visible, }; static const struct kobj_type vmbus_chan_ktype = { @@ -1851,6 +1892,63 @@ static const struct kobj_type vmbus_chan_ktype = { .release = vmbus_chan_release, }; +/** + * hv_create_ring_sysfs() - create "ring" sysfs entry corresponding to ring buffers for a channel. + * @channel: Pointer to vmbus_channel structure + * @hv_mmap_ring_buffer: function pointer for initializing the function to be called on mmap of + * channel's "ring" sysfs node, which is for the ring buffer of that channel. + * Function pointer is of below type: + * int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel, + * struct vm_area_struct *vma)) + * This has a pointer to the channel and a pointer to vm_area_struct, + * used for mmap, as arguments. + * + * Sysfs node for ring buffer of a channel is created along with other fields, however its + * visibility is disabled by default. Sysfs creation needs to be controlled when the use-case + * is running. + * For example, HV_NIC device is used either by uio_hv_generic or hv_netvsc at any given point of + * time, and "ring" sysfs is needed only when uio_hv_generic is bound to that device. To avoid + * exposing the ring buffer by default, this function is reponsible to enable visibility of + * ring for userspace to use. + * Note: Race conditions can happen with userspace and it is not encouraged to create new + * use-cases for this. This was added to maintain backward compatibility, while solving + * one of the race conditions in uio_hv_generic while creating sysfs. + * + * Returns 0 on success or error code on failure. + */ +int hv_create_ring_sysfs(struct vmbus_channel *channel, + int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel, + struct vm_area_struct *vma)) +{ + struct kobject *kobj = &channel->kobj; + + channel->mmap_ring_buffer = hv_mmap_ring_buffer; + channel->ring_sysfs_visible = true; + + return sysfs_update_group(kobj, &vmbus_chan_group); +} +EXPORT_SYMBOL_GPL(hv_create_ring_sysfs); + +/** + * hv_remove_ring_sysfs() - remove ring sysfs entry corresponding to ring buffers for a channel. + * @channel: Pointer to vmbus_channel structure + * + * Hide "ring" sysfs for a channel by changing its is_visible attribute and updating sysfs group. + * + * Returns 0 on success or error code on failure. + */ +int hv_remove_ring_sysfs(struct vmbus_channel *channel) +{ + struct kobject *kobj = &channel->kobj; + int ret; + + channel->ring_sysfs_visible = false; + ret = sysfs_update_group(kobj, &vmbus_chan_group); + channel->mmap_ring_buffer = NULL; + return ret; +} +EXPORT_SYMBOL_GPL(hv_remove_ring_sysfs); + /* * vmbus_add_channel_kobj - setup a sub-directory under device/channels */ diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 1b19b5647495..69c1df0f4ca5 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -131,15 +131,12 @@ static void hv_uio_rescind(struct vmbus_channel *channel) vmbus_device_unregister(channel->device_obj); } -/* Sysfs API to allow mmap of the ring buffers +/* Function used for mmap of ring buffer sysfs interface. * The ring buffer is allocated as contiguous memory by vmbus_open */ -static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, - const struct bin_attribute *attr, - struct vm_area_struct *vma) +static int +hv_uio_ring_mmap(struct vmbus_channel *channel, struct vm_area_struct *vma) { - struct vmbus_channel *channel - = container_of(kobj, struct vmbus_channel, kobj); void *ring_buffer = page_address(channel->ringbuffer_page); if (channel->state != CHANNEL_OPENED_STATE) @@ -149,15 +146,6 @@ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, channel->ringbuffer_pagecount << PAGE_SHIFT); } -static const struct bin_attribute ring_buffer_bin_attr = { - .attr = { - .name = "ring", - .mode = 0600, - }, - .size = 2 * SZ_2M, - .mmap = hv_uio_ring_mmap, -}; - /* Callback from VMBUS subsystem when new channel created. */ static void hv_uio_new_channel(struct vmbus_channel *new_sc) @@ -178,8 +166,7 @@ hv_uio_new_channel(struct vmbus_channel *new_sc) /* Disable interrupts on sub channel */ new_sc->inbound.ring_buffer->interrupt_mask = 1; set_channel_read_mode(new_sc, HV_CALL_ISR); - - ret = sysfs_create_bin_file(&new_sc->kobj, &ring_buffer_bin_attr); + ret = hv_create_ring_sysfs(new_sc, hv_uio_ring_mmap); if (ret) { dev_err(device, "sysfs create ring bin file failed; %d\n", ret); vmbus_close(new_sc); @@ -350,10 +337,18 @@ hv_uio_probe(struct hv_device *dev, goto fail_close; } - ret = sysfs_create_bin_file(&channel->kobj, &ring_buffer_bin_attr); - if (ret) - dev_notice(&dev->device, - "sysfs create ring bin file failed; %d\n", ret); + /* + * This internally calls sysfs_update_group, which returns a non-zero value if it executes + * before sysfs_create_group. This is expected as the 'ring' will be created later in + * vmbus_device_register() -> vmbus_add_channel_kobj(). Thus, no need to check the return + * value and print warning. + * + * Creating/exposing sysfs in driver probe is not encouraged as it can lead to race + * conditions with userspace. For backward compatibility, "ring" sysfs could not be removed + * or decoupled from uio_hv_generic probe. Userspace programs can make use of inotify + * APIs to make sure that ring is created. + */ + hv_create_ring_sysfs(channel, hv_uio_ring_mmap); hv_set_drvdata(dev, pdata); @@ -375,7 +370,7 @@ hv_uio_remove(struct hv_device *dev) if (!pdata) return; - sysfs_remove_bin_file(&dev->channel->kobj, &ring_buffer_bin_attr); + hv_remove_ring_sysfs(dev->channel); uio_unregister_device(&pdata->info); hv_uio_cleanup(dev, pdata); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 675959fb97ba..d6ffe01962c2 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1002,6 +1002,12 @@ struct vmbus_channel { /* The max size of a packet on this channel */ u32 max_pkt_size; + + /* function to mmap ring buffer memory to the channel's sysfs ring attribute */ + int (*mmap_ring_buffer)(struct vmbus_channel *channel, struct vm_area_struct *vma); + + /* boolean to control visibility of sysfs for ring buffer */ + bool ring_sysfs_visible; }; #define lock_requestor(channel, flags) \ From 65995e97a1caacf0024bebda3332b8d1f0f443c4 Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Fri, 2 May 2025 13:18:11 +0530 Subject: [PATCH 364/559] Drivers: hv: Make the sysfs node size for the ring buffer dynamic The ring buffer size varies across VMBus channels. The size of sysfs node for the ring buffer is currently hardcoded to 4 MB. Userspace clients either use fstat() or hardcode this size for doing mmap(). To address this, make the sysfs node size dynamic to reflect the actual ring buffer size for each channel. This will ensure that fstat() on ring sysfs node always returns the correct size of ring buffer. Reviewed-by: Michael Kelley Tested-by: Michael Kelley Reviewed-by: Dexuan Cui Signed-off-by: Naman Jain Link: https://lore.kernel.org/r/20250502074811.2022-3-namjain@linux.microsoft.com Signed-off-by: Greg Kroah-Hartman --- drivers/hv/vmbus_drv.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 0f16a83cc2d6..e3d51a316316 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1820,7 +1820,6 @@ static struct bin_attribute chan_attr_ring_buffer = { .name = "ring", .mode = 0600, }, - .size = 2 * SZ_2M, .mmap = hv_mmap_ring_buffer_wrapper, }; static struct attribute *vmbus_chan_attrs[] = { @@ -1880,11 +1879,21 @@ static umode_t vmbus_chan_bin_attr_is_visible(struct kobject *kobj, return attr->attr.mode; } +static size_t vmbus_chan_bin_size(struct kobject *kobj, + const struct bin_attribute *bin_attr, int a) +{ + const struct vmbus_channel *channel = + container_of(kobj, struct vmbus_channel, kobj); + + return channel->ringbuffer_pagecount << PAGE_SHIFT; +} + static const struct attribute_group vmbus_chan_group = { .attrs = vmbus_chan_attrs, .bin_attrs = vmbus_chan_bin_attrs, .is_visible = vmbus_chan_attr_is_visible, .is_bin_visible = vmbus_chan_bin_attr_is_visible, + .bin_size = vmbus_chan_bin_size, }; static const struct kobj_type vmbus_chan_ktype = { From b662b162c3d06f120749eea0351ec9317d9dd905 Mon Sep 17 00:00:00 2001 From: Feng Jiang Date: Wed, 9 Apr 2025 09:46:33 +0800 Subject: [PATCH 365/559] drm: Fix potential overflow issue in event_string array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When calling scnprintf() to append recovery method to event_string, the second argument should be `sizeof(event_string) - len`, otherwise there is a potential overflow problem. Fixes: b7cf9f4ac1b8 ("drm: Introduce device wedged event") Signed-off-by: Feng Jiang Reviewed-by: André Almeida Reviewed-by: Raag Jadav Link: https://lore.kernel.org/r/20250409014633.31303-1-jiangfeng@kylinos.cn Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/drm_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 17fc5dc708f4..60e5ac179c15 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -549,7 +549,7 @@ int drm_dev_wedged_event(struct drm_device *dev, unsigned long method) if (drm_WARN_ONCE(dev, !recovery, "invalid recovery method %u\n", opt)) break; - len += scnprintf(event_string + len, sizeof(event_string), "%s,", recovery); + len += scnprintf(event_string + len, sizeof(event_string) - len, "%s,", recovery); } if (recovery) From 0c314cda93258cd1f0055a278a6576b5d4aeabf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 30 Apr 2025 11:20:13 +0200 Subject: [PATCH 366/559] arm64: vdso: Work around invalid absolute relocations from GCC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All vDSO code needs to be completely position independent. Symbol references are marked as hidden so the compiler emits PC-relative relocations. However GCC emits absolute relocations for symbol-relative references with an offset >= 64KiB. After recent refactorings in the vDSO code this is the case in __arch_get_vdso_u_timens_data() with a page size of 64KiB. Work around the issue by preventing the optimizer from seeing the offsets. Fixes: 83a2a6b8cfc5 ("vdso/gettimeofday: Prepare do_hres_timens() for introduction of struct vdso_clock") Reported-by: Jan Stancek Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Catalin Marinas Link: https://lore.kernel.org/all/20250430-vdso-absolute-reloc-v2-1-5efcc3bc4b26@linutronix.de Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120002 Closes: https://lore.kernel.org/lkml/aApGPAoctq_eoE2g@t14ultra/ --- arch/arm64/include/asm/vdso/gettimeofday.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 92a2b59a9f3d..3322c7047d84 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -99,6 +99,19 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return res; } +#if IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) +static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void) +{ + const struct vdso_time_data *ret = &vdso_u_time_data; + + /* Work around invalid absolute relocations */ + OPTIMIZER_HIDE_VAR(ret); + + return ret; +} +#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data +#endif /* IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) */ + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ From 38a05c0b87833f5b188ae43b428b1f792df2b384 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Fri, 2 May 2025 13:22:28 +0200 Subject: [PATCH 367/559] irqchip/qcom-mpm: Prevent crash when trying to handle non-wake GPIOs On Qualcomm chipsets not all GPIOs are wakeup capable. Those GPIOs do not have a corresponding MPM pin and should not be handled inside the MPM driver. The IRQ domain hierarchy is always applied, so it's required to explicitly disconnect the hierarchy for those. The pinctrl-msm driver marks these with GPIO_NO_WAKE_IRQ. qcom-pdc has a check for this, but irq-qcom-mpm is currently missing the check. This is causing crashes when setting up interrupts for non-wake GPIOs: root@rb1:~# gpiomon -c gpiochip1 10 irq: IRQ159: trimming hierarchy from :soc@0:interrupt-controller@f200000-1 Unable to handle kernel paging request at virtual address ffff8000a1dc3820 Hardware name: Qualcomm Technologies, Inc. Robotics RB1 (DT) pc : mpm_set_type+0x80/0xcc lr : mpm_set_type+0x5c/0xcc Call trace: mpm_set_type+0x80/0xcc (P) qcom_mpm_set_type+0x64/0x158 irq_chip_set_type_parent+0x20/0x38 msm_gpio_irq_set_type+0x50/0x530 __irq_set_trigger+0x60/0x184 __setup_irq+0x304/0x6bc request_threaded_irq+0xc8/0x19c edge_detector_setup+0x260/0x364 linereq_create+0x420/0x5a8 gpio_ioctl+0x2d4/0x6c0 Fix this by copying the check for GPIO_NO_WAKE_IRQ from qcom-pdc.c, so that MPM is removed entirely from the hierarchy for non-wake GPIOs. Fixes: a6199bb514d8 ("irqchip: Add Qualcomm MPM controller driver") Reported-by: Alexey Klimov Signed-off-by: Stephan Gerhold Signed-off-by: Thomas Gleixner Tested-by: Alexey Klimov Reviewed-by: Bartosz Golaszewski Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250502-irq-qcom-mpm-fix-no-wake-v1-1-8a1eafcd28d4@linaro.org --- drivers/irqchip/irq-qcom-mpm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c index 7942d8eb3d00..f772deb9cba5 100644 --- a/drivers/irqchip/irq-qcom-mpm.c +++ b/drivers/irqchip/irq-qcom-mpm.c @@ -227,6 +227,9 @@ static int qcom_mpm_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; + if (pin == GPIO_NO_WAKE_IRQ) + return irq_domain_disconnect_hierarchy(domain, virq); + ret = irq_domain_set_hwirq_and_chip(domain, virq, pin, &qcom_mpm_chip, priv); if (ret) From 35e4079bf1a2570abffce6ababa631afcf8ea0e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Wed, 30 Apr 2025 17:51:52 -0300 Subject: [PATCH 368/559] drm/v3d: Add job to pending list if the reset was skipped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a CL/CSD job times out, we check if the GPU has made any progress since the last timeout. If so, instead of resetting the hardware, we skip the reset and let the timer get rearmed. This gives long-running jobs a chance to complete. However, when `timedout_job()` is called, the job in question is removed from the pending list, which means it won't be automatically freed through `free_job()`. Consequently, when we skip the reset and keep the job running, the job won't be freed when it finally completes. This situation leads to a memory leak, as exposed in [1] and [2]. Similarly to commit 704d3d60fec4 ("drm/etnaviv: don't block scheduler when GPU is still active"), this patch ensures the job is put back on the pending list when extending the timeout. Cc: stable@vger.kernel.org # 6.0 Reported-by: Daivik Bhatia Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12227 [1] Closes: https://github.com/raspberrypi/linux/issues/6817 [2] Reviewed-by: Iago Toral Quiroga Acked-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250430210643.57924-1-mcanal@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/v3d/v3d_sched.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 4a7701a33cf8..eb35482f6fb5 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -744,11 +744,16 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) return DRM_GPU_SCHED_STAT_NOMINAL; } -/* If the current address or return address have changed, then the GPU - * has probably made progress and we should delay the reset. This - * could fail if the GPU got in an infinite loop in the CL, but that - * is pretty unlikely outside of an i-g-t testcase. - */ +static void +v3d_sched_skip_reset(struct drm_sched_job *sched_job) +{ + struct drm_gpu_scheduler *sched = sched_job->sched; + + spin_lock(&sched->job_list_lock); + list_add(&sched_job->list, &sched->pending_list); + spin_unlock(&sched->job_list_lock); +} + static enum drm_gpu_sched_stat v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 *timedout_ctca, u32 *timedout_ctra) @@ -758,9 +763,16 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q)); u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q)); + /* If the current address or return address have changed, then the GPU + * has probably made progress and we should delay the reset. This + * could fail if the GPU got in an infinite loop in the CL, but that + * is pretty unlikely outside of an i-g-t testcase. + */ if (*timedout_ctca != ctca || *timedout_ctra != ctra) { *timedout_ctca = ctca; *timedout_ctra = ctra; + + v3d_sched_skip_reset(sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -800,11 +812,13 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job) struct v3d_dev *v3d = job->base.v3d; u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4(v3d->ver)); - /* If we've made progress, skip reset and let the timer get - * rearmed. + /* If we've made progress, skip reset, add the job to the pending + * list, and let the timer get rearmed. */ if (job->timedout_batches != batches) { job->timedout_batches = batches; + + v3d_sched_skip_reset(sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } From 5fea0c6c0ebe0a645c247f29fc683852cf51af70 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 28 Apr 2025 13:55:31 -0500 Subject: [PATCH 369/559] KVM: SVM: Update dump_ghcb() to use the GHCB snapshot fields Commit 4e15a0ddc3ff ("KVM: SEV: snapshot the GHCB before accessing it") updated the SEV code to take a snapshot of the GHCB before using it. But the dump_ghcb() function wasn't updated to use the snapshot locations. This results in incorrect output from dump_ghcb() for the "is_valid" and "valid_bitmap" fields. Update dump_ghcb() to use the proper locations. Fixes: 4e15a0ddc3ff ("KVM: SEV: snapshot the GHCB before accessing it") Signed-off-by: Tom Lendacky Reviewed-by: Liam Merwick Link: https://lore.kernel.org/r/8f03878443681496008b1b37b7c4bf77a342b459.1745866531.git.thomas.lendacky@amd.com [sean: add comment and snapshot qualifier] Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/sev.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0bc708ee2788..a7a7dc507336 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3173,9 +3173,14 @@ skip_vmsa_free: kvfree(svm->sev_es.ghcb_sa); } +static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control) +{ + return (((u64)control->exit_code_hi) << 32) | control->exit_code; +} + static void dump_ghcb(struct vcpu_svm *svm) { - struct ghcb *ghcb = svm->sev_es.ghcb; + struct vmcb_control_area *control = &svm->vmcb->control; unsigned int nbits; /* Re-use the dump_invalid_vmcb module parameter */ @@ -3184,18 +3189,24 @@ static void dump_ghcb(struct vcpu_svm *svm) return; } - nbits = sizeof(ghcb->save.valid_bitmap) * 8; + nbits = sizeof(svm->sev_es.valid_bitmap) * 8; - pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa); + /* + * Print KVM's snapshot of the GHCB values that were (unsuccessfully) + * used to handle the exit. If the guest has since modified the GHCB + * itself, dumping the raw GHCB won't help debug why KVM was unable to + * handle the VMGEXIT that KVM observed. + */ + pr_err("GHCB (GPA=%016llx) snapshot:\n", svm->vmcb->control.ghcb_gpa); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code", - ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb)); + kvm_ghcb_get_sw_exit_code(control), kvm_ghcb_sw_exit_code_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1", - ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb)); + control->exit_info_1, kvm_ghcb_sw_exit_info_1_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2", - ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb)); + control->exit_info_2, kvm_ghcb_sw_exit_info_2_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch", - ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb)); - pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap); + svm->sev_es.sw_scratch, kvm_ghcb_sw_scratch_is_valid(svm)); + pr_err("%-20s%*pb\n", "valid_bitmap", nbits, svm->sev_es.valid_bitmap); } static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) @@ -3266,11 +3277,6 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } -static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control) -{ - return (((u64)control->exit_code_hi) << 32) | control->exit_code; -} - static int sev_es_validate_vmgexit(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; From 9129633d568edd36aa22bf703b12835153cec985 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 30 Apr 2025 15:09:54 -0700 Subject: [PATCH 370/559] KVM: x86/mmu: Prevent installing hugepages when mem attributes are changing When changing memory attributes on a subset of a potential hugepage, add the hugepage to the invalidation range tracking to prevent installing a hugepage until the attributes are fully updated. Like the actual hugepage tracking updates in kvm_arch_post_set_memory_attributes(), process only the head and tail pages, as any potential hugepages that are entirely covered by the range will already be tracked. Note, only hugepage chunks whose current attributes are NOT mixed need to be added to the invalidation set, as mixed attributes already prevent installing a hugepage, and it's perfectly safe to install a smaller mapping for a gfn whose attributes aren't changing. Fixes: 8dd2eee9d526 ("KVM: x86/mmu: Handle page fault for private memory") Cc: stable@vger.kernel.org Reported-by: Michael Roth Tested-by: Michael Roth Link: https://lore.kernel.org/r/20250430220954.522672-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 89 ++++++++++++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 26 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 387464ebe8e8..8d1b632e33d2 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7670,32 +7670,6 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm) } #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES -bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, - struct kvm_gfn_range *range) -{ - /* - * Zap SPTEs even if the slot can't be mapped PRIVATE. KVM x86 only - * supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM - * can simply ignore such slots. But if userspace is making memory - * PRIVATE, then KVM must prevent the guest from accessing the memory - * as shared. And if userspace is making memory SHARED and this point - * is reached, then at least one page within the range was previously - * PRIVATE, i.e. the slot's possible hugepage ranges are changing. - * Zapping SPTEs in this case ensures KVM will reassess whether or not - * a hugepage can be used for affected ranges. - */ - if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) - return false; - - /* Unmap the old attribute page. */ - if (range->arg.attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE) - range->attr_filter = KVM_FILTER_SHARED; - else - range->attr_filter = KVM_FILTER_PRIVATE; - - return kvm_unmap_gfn_range(kvm, range); -} - static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn, int level) { @@ -7714,6 +7688,69 @@ static void hugepage_set_mixed(struct kvm_memory_slot *slot, gfn_t gfn, lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG; } +bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, + struct kvm_gfn_range *range) +{ + struct kvm_memory_slot *slot = range->slot; + int level; + + /* + * Zap SPTEs even if the slot can't be mapped PRIVATE. KVM x86 only + * supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM + * can simply ignore such slots. But if userspace is making memory + * PRIVATE, then KVM must prevent the guest from accessing the memory + * as shared. And if userspace is making memory SHARED and this point + * is reached, then at least one page within the range was previously + * PRIVATE, i.e. the slot's possible hugepage ranges are changing. + * Zapping SPTEs in this case ensures KVM will reassess whether or not + * a hugepage can be used for affected ranges. + */ + if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) + return false; + + if (WARN_ON_ONCE(range->end <= range->start)) + return false; + + /* + * If the head and tail pages of the range currently allow a hugepage, + * i.e. reside fully in the slot and don't have mixed attributes, then + * add each corresponding hugepage range to the ongoing invalidation, + * e.g. to prevent KVM from creating a hugepage in response to a fault + * for a gfn whose attributes aren't changing. Note, only the range + * of gfns whose attributes are being modified needs to be explicitly + * unmapped, as that will unmap any existing hugepages. + */ + for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) { + gfn_t start = gfn_round_for_level(range->start, level); + gfn_t end = gfn_round_for_level(range->end - 1, level); + gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level); + + if ((start != range->start || start + nr_pages > range->end) && + start >= slot->base_gfn && + start + nr_pages <= slot->base_gfn + slot->npages && + !hugepage_test_mixed(slot, start, level)) + kvm_mmu_invalidate_range_add(kvm, start, start + nr_pages); + + if (end == start) + continue; + + if ((end + nr_pages) > range->end && + (end + nr_pages) <= (slot->base_gfn + slot->npages) && + !hugepage_test_mixed(slot, end, level)) + kvm_mmu_invalidate_range_add(kvm, end, end + nr_pages); + } + + /* Unmap the old attribute page. */ + if (range->arg.attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE) + range->attr_filter = KVM_FILTER_SHARED; + else + range->attr_filter = KVM_FILTER_PRIVATE; + + return kvm_unmap_gfn_range(kvm, range); +} + + + static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long attrs) { From 8ed12ab1319b2d8e4a529504777aacacf71371e4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 28 Apr 2025 19:43:22 +0200 Subject: [PATCH 371/559] x86/boot/sev: Support memory acceptance in the EFI stub under SVSM Commit: d54d610243a4 ("x86/boot/sev: Avoid shared GHCB page for early memory acceptance") provided a fix for SEV-SNP memory acceptance from the EFI stub when running at VMPL #0. However, that fix was insufficient for SVSM SEV-SNP guests running at VMPL >0, as those rely on a SVSM calling area, which is a shared buffer whose address is programmed into a SEV-SNP MSR, and the SEV init code that sets up this calling area executes much later during the boot. Given that booting via the EFI stub at VMPL >0 implies that the firmware has configured this calling area already, reuse it for performing memory acceptance in the EFI stub. Fixes: fcd042e86422 ("x86/sev: Perform PVALIDATE using the SVSM when not at VMPL0") Tested-by: Tom Lendacky Co-developed-by: Tom Lendacky Signed-off-by: Tom Lendacky Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: Cc: Dionna Amalie Glaze Cc: Kevin Loughlin Cc: linux-efi@vger.kernel.org Link: https://lore.kernel.org/r/20250428174322.2780170-2-ardb+git@google.com --- arch/x86/boot/compressed/mem.c | 5 +---- arch/x86/boot/compressed/sev.c | 40 ++++++++++++++++++++++++++++++++++ arch/x86/boot/compressed/sev.h | 2 ++ 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c index f676156d9f3d..0e9f84ab4bdc 100644 --- a/arch/x86/boot/compressed/mem.c +++ b/arch/x86/boot/compressed/mem.c @@ -34,14 +34,11 @@ static bool early_is_tdx_guest(void) void arch_accept_memory(phys_addr_t start, phys_addr_t end) { - static bool sevsnp; - /* Platform-specific memory-acceptance call goes here */ if (early_is_tdx_guest()) { if (!tdx_accept_memory(start, end)) panic("TDX: Failed to accept memory\n"); - } else if (sevsnp || (sev_get_status() & MSR_AMD64_SEV_SNP_ENABLED)) { - sevsnp = true; + } else if (early_is_sevsnp_guest()) { snp_accept_memory(start, end); } else { error("Cannot accept memory: unknown platform\n"); diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 89ba168f4f0f..0003e4416efd 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -645,3 +645,43 @@ void sev_prep_identity_maps(unsigned long top_level_pgt) sev_verify_cbit(top_level_pgt); } + +bool early_is_sevsnp_guest(void) +{ + static bool sevsnp; + + if (sevsnp) + return true; + + if (!(sev_get_status() & MSR_AMD64_SEV_SNP_ENABLED)) + return false; + + sevsnp = true; + + if (!snp_vmpl) { + unsigned int eax, ebx, ecx, edx; + + /* + * CPUID Fn8000_001F_EAX[28] - SVSM support + */ + eax = 0x8000001f; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (eax & BIT(28)) { + struct msr m; + + /* Obtain the address of the calling area to use */ + boot_rdmsr(MSR_SVSM_CAA, &m); + boot_svsm_caa = (void *)m.q; + boot_svsm_caa_pa = m.q; + + /* + * The real VMPL level cannot be discovered, but the + * memory acceptance routines make no use of that so + * any non-zero value suffices here. + */ + snp_vmpl = U8_MAX; + } + } + return true; +} diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h index 4e463f33186d..d3900384b8ab 100644 --- a/arch/x86/boot/compressed/sev.h +++ b/arch/x86/boot/compressed/sev.h @@ -13,12 +13,14 @@ bool sev_snp_enabled(void); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 sev_get_status(void); +bool early_is_sevsnp_guest(void); #else static inline bool sev_snp_enabled(void) { return false; } static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 sev_get_status(void) { return 0; } +static inline bool early_is_sevsnp_guest(void) { return false; } #endif From b53e523261bf058ea4a518b482222e7a277b186b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 4 May 2025 08:06:28 -0600 Subject: [PATCH 372/559] io_uring: always arm linked timeouts prior to issue There are a few spots where linked timeouts are armed, and not all of them adhere to the pre-arm, attempt issue, post-arm pattern. This can be problematic if the linked request returns that it will trigger a callback later, and does so before the linked timeout is fully armed. Consolidate all the linked timeout handling into __io_issue_sqe(), rather than have it spread throughout the various issue entry points. Cc: stable@vger.kernel.org Link: https://github.com/axboe/liburing/issues/1390 Reported-by: Chase Hiltz Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 50 ++++++++++++++------------------------------- 1 file changed, 15 insertions(+), 35 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index a2b256e96d5d..769814d71153 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -448,24 +448,6 @@ static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) return req->link; } -static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) -{ - if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT))) - return NULL; - return __io_prep_linked_timeout(req); -} - -static noinline void __io_arm_ltimeout(struct io_kiocb *req) -{ - io_queue_linked_timeout(__io_prep_linked_timeout(req)); -} - -static inline void io_arm_ltimeout(struct io_kiocb *req) -{ - if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT)) - __io_arm_ltimeout(req); -} - static void io_prep_async_work(struct io_kiocb *req) { const struct io_issue_def *def = &io_issue_defs[req->opcode]; @@ -518,7 +500,6 @@ static void io_prep_async_link(struct io_kiocb *req) static void io_queue_iowq(struct io_kiocb *req) { - struct io_kiocb *link = io_prep_linked_timeout(req); struct io_uring_task *tctx = req->tctx; BUG_ON(!tctx); @@ -543,8 +524,6 @@ static void io_queue_iowq(struct io_kiocb *req) trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work)); io_wq_enqueue(tctx->io_wq, &req->work); - if (link) - io_queue_linked_timeout(link); } static void io_req_queue_iowq_tw(struct io_kiocb *req, io_tw_token_t tw) @@ -1724,15 +1703,22 @@ static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def, return !!req->file; } +#define REQ_ISSUE_SLOW_FLAGS (REQ_F_CREDS | REQ_F_ARM_LTIMEOUT) + static inline int __io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags, const struct io_issue_def *def) { const struct cred *creds = NULL; + struct io_kiocb *link = NULL; int ret; - if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) - creds = override_creds(req->creds); + if (unlikely(req->flags & REQ_ISSUE_SLOW_FLAGS)) { + if ((req->flags & REQ_F_CREDS) && req->creds != current_cred()) + creds = override_creds(req->creds); + if (req->flags & REQ_F_ARM_LTIMEOUT) + link = __io_prep_linked_timeout(req); + } if (!def->audit_skip) audit_uring_entry(req->opcode); @@ -1742,8 +1728,12 @@ static inline int __io_issue_sqe(struct io_kiocb *req, if (!def->audit_skip) audit_uring_exit(!ret, ret); - if (creds) - revert_creds(creds); + if (unlikely(creds || link)) { + if (creds) + revert_creds(creds); + if (link) + io_queue_linked_timeout(link); + } return ret; } @@ -1769,7 +1759,6 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) if (ret == IOU_ISSUE_SKIP_COMPLETE) { ret = 0; - io_arm_ltimeout(req); /* If the op doesn't have a file, we're not polling for it */ if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue) @@ -1824,8 +1813,6 @@ void io_wq_submit_work(struct io_wq_work *work) else req_ref_get(req); - io_arm_ltimeout(req); - /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ if (atomic_read(&work->flags) & IO_WQ_WORK_CANCEL) { fail: @@ -1941,15 +1928,11 @@ struct file *io_file_get_normal(struct io_kiocb *req, int fd) static void io_queue_async(struct io_kiocb *req, int ret) __must_hold(&req->ctx->uring_lock) { - struct io_kiocb *linked_timeout; - if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) { io_req_defer_failed(req, ret); return; } - linked_timeout = io_prep_linked_timeout(req); - switch (io_arm_poll_handler(req, 0)) { case IO_APOLL_READY: io_kbuf_recycle(req, 0); @@ -1962,9 +1945,6 @@ static void io_queue_async(struct io_kiocb *req, int ret) case IO_APOLL_OK: break; } - - if (linked_timeout) - io_queue_linked_timeout(linked_timeout); } static inline void io_queue_sqe(struct io_kiocb *req) From de3629baf5a33af1919dec7136d643b0662e85ef Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 3 May 2025 18:24:01 +0200 Subject: [PATCH 373/559] parisc: Fix double SIGFPE crash Camm noticed that on parisc a SIGFPE exception will crash an application with a second SIGFPE in the signal handler. Dave analyzed it, and it happens because glibc uses a double-word floating-point store to atomically update function descriptors. As a result of lazy binding, we hit a floating-point store in fpe_func almost immediately. When the T bit is set, an assist exception trap occurs when when the co-processor encounters *any* floating-point instruction except for a double store of register %fr0. The latter cancels all pending traps. Let's fix this by clearing the Trap (T) bit in the FP status register before returning to the signal handler in userspace. The issue can be reproduced with this test program: root@parisc:~# cat fpe.c static void fpe_func(int sig, siginfo_t *i, void *v) { sigset_t set; sigemptyset(&set); sigaddset(&set, SIGFPE); sigprocmask(SIG_UNBLOCK, &set, NULL); printf("GOT signal %d with si_code %ld\n", sig, i->si_code); } int main() { struct sigaction action = { .sa_sigaction = fpe_func, .sa_flags = SA_RESTART|SA_SIGINFO }; sigaction(SIGFPE, &action, 0); feenableexcept(FE_OVERFLOW); return printf("%lf\n",1.7976931348623158E308*1.7976931348623158E308); } root@parisc:~# gcc fpe.c -lm root@parisc:~# ./a.out Floating point exception root@parisc:~# strace -f ./a.out execve("./a.out", ["./a.out"], 0xf9ac7034 /* 20 vars */) = 0 getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) = 0 ... rt_sigaction(SIGFPE, {sa_handler=0x1110a, sa_mask=[], sa_flags=SA_RESTART|SA_SIGINFO}, NULL, 8) = 0 --- SIGFPE {si_signo=SIGFPE, si_code=FPE_FLTOVF, si_addr=0x1078f} --- --- SIGFPE {si_signo=SIGFPE, si_code=FPE_FLTOVF, si_addr=0xf8f21237} --- +++ killed by SIGFPE +++ Floating point exception Signed-off-by: Helge Deller Suggested-by: John David Anglin Reported-by: Camm Maguire Cc: stable@vger.kernel.org --- arch/parisc/math-emu/driver.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c index 34495446e051..71829cb7bc81 100644 --- a/arch/parisc/math-emu/driver.c +++ b/arch/parisc/math-emu/driver.c @@ -97,9 +97,19 @@ handle_fpe(struct pt_regs *regs) memcpy(regs->fr, frcopy, sizeof regs->fr); if (signalcode != 0) { - force_sig_fault(signalcode >> 24, signalcode & 0xffffff, - (void __user *) regs->iaoq[0]); - return -1; + int sig = signalcode >> 24; + + if (sig == SIGFPE) { + /* + * Clear floating point trap bit to avoid trapping + * again on the first floating-point instruction in + * the userspace signal handler. + */ + regs->fr[0] &= ~(1ULL << 38); + } + force_sig_fault(sig, signalcode & 0xffffff, + (void __user *) regs->iaoq[0]); + return -1; } return signalcode ? -1 : 0; From df2e19a883fdea698cdbed4987987db999b19d58 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 4 May 2025 13:50:09 -0400 Subject: [PATCH 374/559] bcachefs: thread_with_stdio: fix spinning instead of exiting bch2_stdio_redirect_vprintf() was missing a check for stdio->done, i.e. exiting. This caused the thread attempting to print to spin, and since it was being called from the kthread ran by thread_with_stdio, the userspace side hung as well. Change it to return -EPIPE - i.e. writing to a pipe that's been closed. Reported-by: Jan Solanti Signed-off-by: Kent Overstreet --- fs/bcachefs/thread_with_file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c index dea73bc1cb51..314a24d15d4e 100644 --- a/fs/bcachefs/thread_with_file.c +++ b/fs/bcachefs/thread_with_file.c @@ -455,8 +455,10 @@ ssize_t bch2_stdio_redirect_vprintf(struct stdio_redirect *stdio, bool nonblocki struct stdio_buf *buf = &stdio->output; unsigned long flags; ssize_t ret; - again: + if (stdio->done) + return -EPIPE; + spin_lock_irqsave(&buf->lock, flags); ret = bch2_darray_vprintf(&buf->buf, GFP_NOWAIT, fmt, args); spin_unlock_irqrestore(&buf->lock, flags); From 92a09c47464d040866cf2b4cd052bc60555185fb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 May 2025 13:55:04 -0700 Subject: [PATCH 375/559] Linux 6.15-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 94be5dfb81fb..b29cc321ffd9 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* From 68025adfc13e6cd15eebe2293f77659f47daf13b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 4 Apr 2025 17:05:19 +0200 Subject: [PATCH 376/559] um: fix _nofault accesses Nathan reported [1] that when built with clang, the um kernel crashes pretty much immediately. This turned out to be an issue with the inline assembly I had added, when clang used %rax/%eax for both operands. Reorder it so current->thread.segv_continue is written first, and then the lifetime of _faulted won't have overlap with the lifetime of segv_continue. In the email thread Benjamin also pointed out that current->mm is only NULL for true kernel tasks, but we could do this for a userspace task, so the current->thread.segv_continue logic must be lifted out of the mm==NULL check. Finally, while looking at this, put a barrier() so the NULL assignment to thread.segv_continue cannot be reorder before the possibly faulting operation. Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/r/20250402221254.GA384@ax162 [1] Fixes: d1d7f01f7cd3 ("um: mark rodata read-only and implement _nofault accesses") Tested-by: Nathan Chancellor Signed-off-by: Johannes Berg --- arch/um/include/asm/uaccess.h | 2 ++ arch/um/kernel/trap.c | 26 ++++++++++++------------ arch/x86/um/shared/sysdep/faultinfo_32.h | 2 +- arch/x86/um/shared/sysdep/faultinfo_64.h | 2 +- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h index 3a08f9029a3f..1c6e0ae41b0c 100644 --- a/arch/um/include/asm/uaccess.h +++ b/arch/um/include/asm/uaccess.h @@ -55,6 +55,7 @@ do { \ goto err_label; \ } \ *((type *)dst) = get_unaligned((type *)(src)); \ + barrier(); \ current->thread.segv_continue = NULL; \ } while (0) @@ -66,6 +67,7 @@ do { \ if (__faulted) \ goto err_label; \ put_unaligned(*((type *)src), (type *)(dst)); \ + barrier(); \ current->thread.segv_continue = NULL; \ } while (0) diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index ce073150dc20..ef2272e92a43 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -225,20 +225,20 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, panic("Failed to sync kernel TLBs: %d", err); goto out; } - else if (current->mm == NULL) { - if (current->pagefault_disabled) { - if (!mc) { - show_regs(container_of(regs, struct pt_regs, regs)); - panic("Segfault with pagefaults disabled but no mcontext"); - } - if (!current->thread.segv_continue) { - show_regs(container_of(regs, struct pt_regs, regs)); - panic("Segfault without recovery target"); - } - mc_set_rip(mc, current->thread.segv_continue); - current->thread.segv_continue = NULL; - goto out; + else if (current->pagefault_disabled) { + if (!mc) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault with pagefaults disabled but no mcontext"); } + if (!current->thread.segv_continue) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault without recovery target"); + } + mc_set_rip(mc, current->thread.segv_continue); + current->thread.segv_continue = NULL; + goto out; + } + else if (current->mm == NULL) { show_regs(container_of(regs, struct pt_regs, regs)); panic("Segfault with no mm"); } diff --git a/arch/x86/um/shared/sysdep/faultinfo_32.h b/arch/x86/um/shared/sysdep/faultinfo_32.h index ab5c8e47049c..9193a7790a71 100644 --- a/arch/x86/um/shared/sysdep/faultinfo_32.h +++ b/arch/x86/um/shared/sysdep/faultinfo_32.h @@ -31,8 +31,8 @@ struct faultinfo { #define ___backtrack_faulted(_faulted) \ asm volatile ( \ - "mov $0, %0\n" \ "movl $__get_kernel_nofault_faulted_%=,%1\n" \ + "mov $0, %0\n" \ "jmp _end_%=\n" \ "__get_kernel_nofault_faulted_%=:\n" \ "mov $1, %0;" \ diff --git a/arch/x86/um/shared/sysdep/faultinfo_64.h b/arch/x86/um/shared/sysdep/faultinfo_64.h index 26fb4835d3e9..61e4ca1e0ab5 100644 --- a/arch/x86/um/shared/sysdep/faultinfo_64.h +++ b/arch/x86/um/shared/sysdep/faultinfo_64.h @@ -31,8 +31,8 @@ struct faultinfo { #define ___backtrack_faulted(_faulted) \ asm volatile ( \ - "mov $0, %0\n" \ "movq $__get_kernel_nofault_faulted_%=,%1\n" \ + "mov $0, %0\n" \ "jmp _end_%=\n" \ "__get_kernel_nofault_faulted_%=:\n" \ "mov $1, %0;" \ From 5214a9f6c0f56644acb9d2cbb58facf1856d322b Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Mon, 14 Apr 2025 11:59:33 +0200 Subject: [PATCH 377/559] x86/microcode: Consolidate the loader enablement checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidate the whole logic which determines whether the microcode loader should be enabled or not into a single function and call it everywhere. Well, almost everywhere - not in mk_early_pgtbl_32() because there the kernel is running without paging enabled and checking dis_ucode_ldr et al would require physical addresses and uglification of the code. But since this is 32-bit, the easier thing to do is to simply map the initrd unconditionally especially since that mapping is getting removed later anyway by zap_early_initrd_mapping() and avoid the uglification. In doing so, address the issue of old 486er machines without CPUID support, not booting current kernels. [ mingo: Fix no previous prototype for ‘microcode_loader_disabled’ [-Wmissing-prototypes] ] Fixes: 4c585af7180c1 ("x86/boot/32: Temporarily map initrd for microcode loading") Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/CANpbe9Wm3z8fy9HbgS8cuhoj0TREYEEkBipDuhgkWFvqX0UoVQ@mail.gmail.com --- arch/x86/include/asm/microcode.h | 2 + arch/x86/kernel/cpu/microcode/amd.c | 6 ++- arch/x86/kernel/cpu/microcode/core.c | 60 ++++++++++++++---------- arch/x86/kernel/cpu/microcode/intel.c | 2 +- arch/x86/kernel/cpu/microcode/internal.h | 1 - arch/x86/kernel/head32.c | 4 -- 6 files changed, 42 insertions(+), 33 deletions(-) diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 695e569159c1..be7cddc414e4 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -17,10 +17,12 @@ struct ucode_cpu_info { void load_ucode_bsp(void); void load_ucode_ap(void); void microcode_bsp_resume(void); +bool __init microcode_loader_disabled(void); #else static inline void load_ucode_bsp(void) { } static inline void load_ucode_ap(void) { } static inline void microcode_bsp_resume(void) { } +static inline bool __init microcode_loader_disabled(void) { return false; } #endif extern unsigned long initrd_start_early; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 4a10d35e70aa..96cb992d50ef 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -1098,15 +1098,17 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz static int __init save_microcode_in_initrd(void) { - unsigned int cpuid_1_eax = native_cpuid_eax(1); struct cpuinfo_x86 *c = &boot_cpu_data; struct cont_desc desc = { 0 }; + unsigned int cpuid_1_eax; enum ucode_state ret; struct cpio_data cp; - if (dis_ucode_ldr || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) + if (microcode_loader_disabled() || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) return 0; + cpuid_1_eax = native_cpuid_eax(1); + if (!find_blobs_in_containers(&cp)) return -EINVAL; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index b3658d11e7b6..079f046ee26d 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -41,8 +41,8 @@ #include "internal.h" -static struct microcode_ops *microcode_ops; -bool dis_ucode_ldr = true; +static struct microcode_ops *microcode_ops; +static bool dis_ucode_ldr = false; bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV); module_param(force_minrev, bool, S_IRUSR | S_IWUSR); @@ -84,6 +84,9 @@ static bool amd_check_current_patch_level(void) u32 lvl, dummy, i; u32 *levels; + if (x86_cpuid_vendor() != X86_VENDOR_AMD) + return false; + native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy); levels = final_levels; @@ -95,27 +98,29 @@ static bool amd_check_current_patch_level(void) return false; } -static bool __init check_loader_disabled_bsp(void) +bool __init microcode_loader_disabled(void) { - static const char *__dis_opt_str = "dis_ucode_ldr"; - const char *cmdline = boot_command_line; - const char *option = __dis_opt_str; - - /* - * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not - * completely accurate as xen pv guests don't see that CPUID bit set but - * that's good enough as they don't land on the BSP path anyway. - */ - if (native_cpuid_ecx(1) & BIT(31)) + if (dis_ucode_ldr) return true; - if (x86_cpuid_vendor() == X86_VENDOR_AMD) { - if (amd_check_current_patch_level()) - return true; - } - - if (cmdline_find_option_bool(cmdline, option) <= 0) - dis_ucode_ldr = false; + /* + * Disable when: + * + * 1) The CPU does not support CPUID. + * + * 2) Bit 31 in CPUID[1]:ECX is clear + * The bit is reserved for hypervisor use. This is still not + * completely accurate as XEN PV guests don't see that CPUID bit + * set, but that's good enough as they don't land on the BSP + * path anyway. + * + * 3) Certain AMD patch levels are not allowed to be + * overwritten. + */ + if (!have_cpuid_p() || + native_cpuid_ecx(1) & BIT(31) || + amd_check_current_patch_level()) + dis_ucode_ldr = true; return dis_ucode_ldr; } @@ -125,7 +130,10 @@ void __init load_ucode_bsp(void) unsigned int cpuid_1_eax; bool intel = true; - if (!have_cpuid_p()) + if (cmdline_find_option_bool(boot_command_line, "dis_ucode_ldr") > 0) + dis_ucode_ldr = true; + + if (microcode_loader_disabled()) return; cpuid_1_eax = native_cpuid_eax(1); @@ -146,9 +154,6 @@ void __init load_ucode_bsp(void) return; } - if (check_loader_disabled_bsp()) - return; - if (intel) load_ucode_intel_bsp(&early_data); else @@ -159,6 +164,11 @@ void load_ucode_ap(void) { unsigned int cpuid_1_eax; + /* + * Can't use microcode_loader_disabled() here - .init section + * hell. It doesn't have to either - the BSP variant must've + * parsed cmdline already anyway. + */ if (dis_ucode_ldr) return; @@ -810,7 +820,7 @@ static int __init microcode_init(void) struct cpuinfo_x86 *c = &boot_cpu_data; int error; - if (dis_ucode_ldr) + if (microcode_loader_disabled()) return -EINVAL; if (c->x86_vendor == X86_VENDOR_INTEL) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 819199bc0119..2a397da43923 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -389,7 +389,7 @@ static int __init save_builtin_microcode(void) if (xchg(&ucode_patch_va, NULL) != UCODE_BSP_LOADED) return 0; - if (dis_ucode_ldr || boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + if (microcode_loader_disabled() || boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return 0; uci.mc = get_microcode_blob(&uci, true); diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index 5df621752fef..50a9702ae4e2 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -94,7 +94,6 @@ static inline unsigned int x86_cpuid_family(void) return x86_family(eax); } -extern bool dis_ucode_ldr; extern bool force_minrev; #ifdef CONFIG_CPU_SUP_AMD diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index de001b2146ab..375f2d7f1762 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -145,10 +145,6 @@ void __init __no_stack_protector mk_early_pgtbl_32(void) *ptr = (unsigned long)ptep + PAGE_OFFSET; #ifdef CONFIG_MICROCODE_INITRD32 - /* Running on a hypervisor? */ - if (native_cpuid_ecx(1) & BIT(31)) - return; - params = (struct boot_params *)__pa_nodebug(&boot_params); if (!params->hdr.ramdisk_size || !params->hdr.ramdisk_image) return; From 936b73feab5cd7eae8fe3d08a7ac9b1f8ac68042 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Mon, 28 Apr 2025 11:35:55 -0700 Subject: [PATCH 378/559] drm/i915/slpc: Balance the inc/dec for num_waiters As seen in some recent failures, SLPC num_waiters value is < 0. This happens because the inc/dec are not balanced. We should skip decrement for the same conditions as the increment. Currently, we do that for power saving profile mode. This patch also ensures that num_waiters is incremented in the case min_softlimit is at boost freq. It ensures that we don't reduce the frequency while this request is in flight. v2: Add Fixes tags Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13598 Fixes: f864a29afc32 ("drm/i915/slpc: Optmize waitboost for SLPC") Fixes: 4a82ceb04ad4 ("drm/i915/slpc: Add sysfs for SLPC power profiles") Cc: Sk Anirban Reviewed-by: Sk Anirban Signed-off-by: Vinay Belgaumkar Signed-off-by: Daniele Ceraolo Spurio Link: https://lore.kernel.org/r/20250428183555.3250021-1-vinay.belgaumkar@intel.com (cherry picked from commit d26e55085f4b7a63677670db827541209257b313) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_rps.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 64e9317f58fb..71ee01d9ef64 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1001,6 +1001,10 @@ void intel_rps_dec_waiters(struct intel_rps *rps) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + /* Don't decrement num_waiters for req where increment was skipped */ + if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) + return; + intel_guc_slpc_dec_waiters(slpc); } else { atomic_dec(&rps->num_waiters); @@ -1029,11 +1033,15 @@ void intel_rps_boost(struct i915_request *rq) if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) return; - if (slpc->min_freq_softlimit >= slpc->boost_freq) - return; - /* Return if old value is non zero */ if (!atomic_fetch_inc(&slpc->num_waiters)) { + /* + * Skip queuing boost work if frequency is already boosted, + * but still increment num_waiters. + */ + if (slpc->min_freq_softlimit >= slpc->boost_freq) + return; + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", rq->fence.context, rq->fence.seqno); queue_work(rps_to_gt(rps)->i915->unordered_wq, From eb16b3727c05ed36420c90eca1e8f0e279514c1c Mon Sep 17 00:00:00 2001 From: Nylon Chen Date: Fri, 11 Apr 2025 15:38:49 +0800 Subject: [PATCH 379/559] riscv: misaligned: Add handling for ZCB instructions Add support for the Zcb extension's compressed half-word instructions (C.LHU, C.LH, and C.SH) in the RISC-V misaligned access trap handler. Signed-off-by: Zong Li Signed-off-by: Nylon Chen Fixes: 956d705dd279 ("riscv: Unaligned load/store handling for M_MODE") Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250411073850.3699180-2-nylon.chen@sifive.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/traps_misaligned.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 4354c87c0376..dde5d11dc1b5 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -88,6 +88,13 @@ #define INSN_MATCH_C_FSWSP 0xe002 #define INSN_MASK_C_FSWSP 0xe003 +#define INSN_MATCH_C_LHU 0x8400 +#define INSN_MASK_C_LHU 0xfc43 +#define INSN_MATCH_C_LH 0x8440 +#define INSN_MASK_C_LH 0xfc43 +#define INSN_MATCH_C_SH 0x8c00 +#define INSN_MASK_C_SH 0xfc43 + #define INSN_LEN(insn) ((((insn) & 0x3) < 0x3) ? 2 : 4) #if defined(CONFIG_64BIT) @@ -431,6 +438,13 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) fp = 1; len = 4; #endif + } else if ((insn & INSN_MASK_C_LHU) == INSN_MATCH_C_LHU) { + len = 2; + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LH) == INSN_MATCH_C_LH) { + len = 2; + shift = 8 * (sizeof(ulong) - len); + insn = RVC_RS2S(insn) << SH_RD; } else { regs->epc = epc; return -1; @@ -530,6 +544,9 @@ static int handle_scalar_misaligned_store(struct pt_regs *regs) len = 4; val.data_ulong = GET_F32_RS2C(insn, regs); #endif + } else if ((insn & INSN_MASK_C_SH) == INSN_MATCH_C_SH) { + len = 2; + val.data_ulong = GET_RS2S(insn, regs); } else { regs->epc = epc; return -1; From f5c84eff634ba003326aa034c414e2a9dcb7c6a7 Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Mon, 28 Apr 2025 22:36:26 +0800 Subject: [PATCH 380/559] loop: Add sanity check for read/write_iter Some file systems do not support read_iter/write_iter, such as selinuxfs in this issue. So before calling them, first confirm that the interface is supported and then call it. It is releavant in that vfs_iter_read/write have the check, and removal of their used caused szybot to be able to hit this issue. Fixes: f2fed441c69b ("loop: stop using vfs_iter__{read,write} for buffered I/O") Reported-by: syzbot+6af973a3b8dfd2faefdc@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6af973a3b8dfd2faefdc Signed-off-by: Lizhi Xu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250428143626.3318717-1-lizhi.xu@windriver.com Signed-off-by: Jens Axboe --- drivers/block/loop.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 46cba261075f..b8ba7de08753 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -505,6 +505,17 @@ static void loop_assign_backing_file(struct loop_device *lo, struct file *file) lo->lo_min_dio_size = loop_query_min_dio_size(lo); } +static int loop_check_backing_file(struct file *file) +{ + if (!file->f_op->read_iter) + return -EINVAL; + + if ((file->f_mode & FMODE_WRITE) && !file->f_op->write_iter) + return -EINVAL; + + return 0; +} + /* * loop_change_fd switched the backing store of a loopback device to * a new file. This is useful for operating system installers to free up @@ -526,6 +537,10 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (!file) return -EBADF; + error = loop_check_backing_file(file); + if (error) + return error; + /* suppress uevents while reconfiguring the device */ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1); @@ -963,6 +978,14 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, if (!file) return -EBADF; + + if ((mode & BLK_OPEN_WRITE) && !file->f_op->write_iter) + return -EINVAL; + + error = loop_check_backing_file(file); + if (error) + return error; + is_loop = is_loop_device(file); /* This is safe, since we have a reference from open(). */ From 94cff94634e506a4a44684bee1875d2dbf782722 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Apr 2025 15:31:16 +0200 Subject: [PATCH 381/559] clocksource/i8253: Use raw_spinlock_irqsave() in clockevent_i8253_disable() On x86 during boot, clockevent_i8253_disable() can be invoked via x86_late_time_init -> hpet_time_init() -> pit_timer_init() which happens with enabled interrupts. If some of the old i8253 hardware is actually used then lockdep will notice that i8253_lock is used in hard interrupt context. This causes lockdep to complain because it observed the lock being acquired with interrupts enabled and in hard interrupt context. Make clockevent_i8253_disable() acquire the lock with raw_spinlock_irqsave() to cure this. [ tglx: Massage change log and use guard() ] Fixes: c8c4076723dac ("x86/timer: Skip PIT initialization on modern chipsets") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250404133116.p-XRWJXf@linutronix.de --- drivers/clocksource/i8253.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c index 39f7c2d736d1..b603c25f3dfa 100644 --- a/drivers/clocksource/i8253.c +++ b/drivers/clocksource/i8253.c @@ -103,7 +103,7 @@ int __init clocksource_i8253_init(void) #ifdef CONFIG_CLKEVT_I8253 void clockevent_i8253_disable(void) { - raw_spin_lock(&i8253_lock); + guard(raw_spinlock_irqsave)(&i8253_lock); /* * Writing the MODE register should stop the counter, according to @@ -132,8 +132,6 @@ void clockevent_i8253_disable(void) outb_p(0, PIT_CH0); outb_p(0x30, PIT_MODE); - - raw_spin_unlock(&i8253_lock); } static int pit_shutdown(struct clock_event_device *evt) From 0db8a9a943e9b651952a62e6e985effb4161ac5e Mon Sep 17 00:00:00 2001 From: Konstantin Shkolnyy Date: Fri, 18 Apr 2025 10:20:14 -0500 Subject: [PATCH 382/559] s390/configs: Enable VDPA on Nvidia ConnectX-6 network card ConnectX-6 is the first VDPA-capable NIC. For earlier NICs, Nvidia implements a VDPA emulation in s/w, which hasn't been validated on s390. Add options necessary for VDPA to work. These options are also added because Fedora has them: CONFIG_VDPA_SIM CONFIG_VDPA_SIM_NET CONFIG_VDPA_SIM_BLOCK CONFIG_VDPA_USER CONFIG_VP_VDPA Signed-off-by: Konstantin Shkolnyy Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 8 ++++++++ arch/s390/configs/defconfig | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 6f2c9ce1b154..68ba3bedfe00 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -628,8 +628,16 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_MEM=m CONFIG_VIRTIO_INPUT=y +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m +CONFIG_MLX5_VDPA_NET=m +CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index f18a7d97ac21..57b34f9fabf5 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -618,8 +618,16 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_MEM=m CONFIG_VIRTIO_INPUT=y +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m +CONFIG_MLX5_VDPA_NET=m +CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y From d2b8111c22d7d82f28df574acc54aec44ce3d45c Mon Sep 17 00:00:00 2001 From: Konstantin Shkolnyy Date: Fri, 18 Apr 2025 10:20:15 -0500 Subject: [PATCH 383/559] s390/configs: Enable options required for TC flow offload While testing Open vSwitch with Nvidia ConnectX-6 NIC, it was noticed that it didn't offload TC flows into the NIC, and its log contained many messages such as: "failed to offload flow: No such file or directory: " and, upon enabling more versose logging, additionally: "received NAK error=2 - TC classifier not found" The options enabled here are listed as requirements in Nvidia online documentation, among other options that were already enabled. Now all options listed by Nvidia are enabled.. This option is also added because Fedora has it: CONFIG_NET_EMATCH Signed-off-by: Konstantin Shkolnyy Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 6 ++++++ arch/s390/configs/defconfig | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 68ba3bedfe00..193d251b4c8f 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -395,6 +395,9 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -405,6 +408,9 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GATE=m CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 57b34f9fabf5..ec8fdeba4f3e 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -385,6 +385,9 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -395,6 +398,9 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GATE=m CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y From ae952eea6f4a7e2193f8721a5366049946e012e7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 24 Apr 2025 17:07:01 +0200 Subject: [PATCH 384/559] s390/entry: Fix last breaking event handling in case of stack corruption In case of stack corruption stack_invalid() is called and the expectation is that register r10 contains the last breaking event address. This dependency is quite subtle and broke a couple of years ago without that anybody noticed. Fix this by getting rid of the dependency and read the last breaking event address from lowcore. Fixes: 56e62a737028 ("s390: convert to generic entry") Acked-by: Ilya Leoshkevich Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index dd291c9ad6a6..9980c17ba22d 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -602,7 +602,8 @@ SYM_CODE_START(stack_invalid) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_R8(64,%r11),0(%r14) - stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + GET_LC %r2 + mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs jg kernel_stack_invalid From 833542b3e3d23f640aef6569ba1fd641bc195fa0 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 29 Apr 2025 18:21:01 +0200 Subject: [PATCH 385/559] s390/dcssblk: Fix build error with CONFIG_DAX=m and CONFIG_DCSSBLK=y After commit 653d7825c149 ("dcssblk: mark DAX broken, remove FS_DAX_LIMITED support") moved the "select DAX" from config DCSSBLK to the new config DCSSBLK_DAX, randconfig tests could result in build errors like this: s390-linux-ld: drivers/s390/block/dcssblk.o: in function `dcssblk_shared_store': drivers/s390/block/dcssblk.c:417: undefined reference to `kill_dax' s390-linux-ld: drivers/s390/block/dcssblk.c:418: undefined reference to `put_dax' This is because it's now possible to have CONFIG_DCSSBLK=y, but CONFIG_DAX=m. Fix this by adding "depends on DAX || DAX=n" to config DCSSBLK, to make it explicit that we want either no DAX, or the same "y/m" for both config DAX and DCSSBLK, similar to config BLK_DEV_DM. This also requires removing the "select DAX" from config DCSSBLK_DAX, or else there would be a recursive dependency detected. DCSSBLK_DAX is marked as BROKEN at the moment, and won't work well with DAX anyway, so it doesn't really matter if it is selected. Fixes: 653d7825c149 ("dcssblk: mark DAX broken, remove FS_DAX_LIMITED support") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504291604.pvjonhWX-lkp@intel.com/ Signed-off-by: Gerald Schaefer Signed-off-by: Heiko Carstens --- drivers/s390/block/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index 4bfe469c04aa..8c1c908d2c6e 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -5,7 +5,7 @@ comment "S/390 block device drivers" config DCSSBLK def_tristate m prompt "DCSSBLK support" - depends on S390 && BLOCK + depends on S390 && BLOCK && (DAX || DAX=n) help Support for dcss block device @@ -14,7 +14,6 @@ config DCSSBLK_DAX depends on DCSSBLK # requires S390 ZONE_DEVICE support depends on BROKEN - select DAX prompt "DCSSBLK DAX support" help Enable DAX operation for the dcss block device From 3a47b1e3cea247857aa48cfe3d0a07e989e6c57d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 30 Apr 2025 16:41:01 +0200 Subject: [PATCH 386/559] s390: Update defconfigs Just the regular update of all defconfigs. Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 14 +++++--------- arch/s390/configs/defconfig | 10 +++------- arch/s390/configs/zfcpdump_defconfig | 1 - 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 193d251b4c8f..24b22f6a9e99 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -38,7 +38,6 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y @@ -92,7 +91,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y CONFIG_SLUB_STATS=y @@ -668,7 +666,6 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=y CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -738,11 +735,10 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_HARDENED_USERCOPY=y -CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_LOCKDOWN_LSM=y @@ -755,6 +751,8 @@ CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y +CONFIG_FORTIFY_SOURCE=y +CONFIG_HARDENED_USERCOPY=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set @@ -770,7 +768,6 @@ CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m @@ -815,7 +812,6 @@ CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_HMAC_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m @@ -826,9 +822,9 @@ CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_CRYPTO_KRB5=m +CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_RANDOM32_SELFTEST=y CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index ec8fdeba4f3e..2b8b42d569bc 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -36,7 +36,6 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y @@ -86,7 +85,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y # CONFIG_COMPAT_BRK is not set @@ -655,7 +653,6 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=m CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -725,6 +722,7 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y @@ -756,7 +754,6 @@ CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m @@ -802,7 +799,6 @@ CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_HMAC_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m @@ -813,10 +809,10 @@ CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_CRYPTO_KRB5=m +CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 853b2326a171..8163c1702720 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -70,7 +70,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DEBUG_FS=y CONFIG_PANIC_ON_OOPS=y -# CONFIG_SCHED_DEBUG is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_RCU_TRACE is not set # CONFIG_FTRACE is not set From 6328bdc988d23201c700e1e7e04eb05a1149ac1e Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Mon, 5 May 2025 15:56:29 +0300 Subject: [PATCH 387/559] usb: xhci: Don't trust the EP Context cycle bit when moving HW dequeue VIA VL805 doesn't bother updating the EP Context cycle bit when the endpoint halts. This is seen by patching xhci_move_dequeue_past_td() to print the cycle bits of the EP Context and the TRB at hw_dequeue and then disconnecting a flash drive while reading it. Actual cycle state is random as expected, but the EP Context bit is always 1. This means that the cycle state produced by this function is wrong half the time, and then the endpoint stops working. Work around it by looking at the cycle bit of TD's end_trb instead of believing the Endpoint or Stream Context. Specifically: - rename cycle_found to hw_dequeue_found to avoid confusion - initialize new_cycle from td->end_trb instead of hw_dequeue - switch new_cycle toggling to happen after end_trb is found Now a workload which regularly stalls the device works normally for a few hours and clearly demonstrates the HW bug - the EP Context bit is not updated in a new cycle until Set TR Dequeue overwrites it: [ +0,000298] sd 10:0:0:0: [sdc] Attached SCSI disk [ +0,011758] cycle bits: TRB 1 EP Ctx 1 [ +5,947138] cycle bits: TRB 1 EP Ctx 1 [ +0,065731] cycle bits: TRB 0 EP Ctx 1 [ +0,064022] cycle bits: TRB 0 EP Ctx 0 [ +0,063297] cycle bits: TRB 0 EP Ctx 0 [ +0,069823] cycle bits: TRB 0 EP Ctx 0 [ +0,063390] cycle bits: TRB 1 EP Ctx 0 [ +0,063064] cycle bits: TRB 1 EP Ctx 1 [ +0,062293] cycle bits: TRB 1 EP Ctx 1 [ +0,066087] cycle bits: TRB 0 EP Ctx 1 [ +0,063636] cycle bits: TRB 0 EP Ctx 0 [ +0,066360] cycle bits: TRB 0 EP Ctx 0 Also tested on the buggy ASM1042 which moves EP Context dequeue to the next TRB after errors, one problem case addressed by the rework that implemented this loop. In this case hw_dequeue can be enqueue, so simply picking the cycle bit of TRB at hw_dequeue wouldn't work. Commit 5255660b208a ("xhci: add quirk for host controllers that don't update endpoint DCS") tried to solve the stale cycle problem, but it was more complex and got reverted due to a reported issue. Cc: Jonathan Bell Cc: Oliver Neukum Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250505125630.561699-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b906bc2eea5f..423bf3649570 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -699,7 +699,7 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci, int new_cycle; dma_addr_t addr; u64 hw_dequeue; - bool cycle_found = false; + bool hw_dequeue_found = false; bool td_last_trb_found = false; u32 trb_sct = 0; int ret; @@ -715,25 +715,24 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci, hw_dequeue = xhci_get_hw_deq(xhci, dev, ep_index, stream_id); new_seg = ep_ring->deq_seg; new_deq = ep_ring->dequeue; - new_cycle = hw_dequeue & 0x1; + new_cycle = le32_to_cpu(td->end_trb->generic.field[3]) & TRB_CYCLE; /* - * We want to find the pointer, segment and cycle state of the new trb - * (the one after current TD's end_trb). We know the cycle state at - * hw_dequeue, so walk the ring until both hw_dequeue and end_trb are - * found. + * Walk the ring until both the next TRB and hw_dequeue are found (don't + * move hw_dequeue back if it went forward due to a HW bug). Cycle state + * is loaded from a known good TRB, track later toggles to maintain it. */ do { - if (!cycle_found && xhci_trb_virt_to_dma(new_seg, new_deq) + if (!hw_dequeue_found && xhci_trb_virt_to_dma(new_seg, new_deq) == (dma_addr_t)(hw_dequeue & ~0xf)) { - cycle_found = true; + hw_dequeue_found = true; if (td_last_trb_found) break; } if (new_deq == td->end_trb) td_last_trb_found = true; - if (cycle_found && trb_is_link(new_deq) && + if (td_last_trb_found && trb_is_link(new_deq) && link_trb_toggles_cycle(new_deq)) new_cycle ^= 0x1; @@ -745,7 +744,7 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci, return -EINVAL; } - } while (!cycle_found || !td_last_trb_found); + } while (!hw_dequeue_found || !td_last_trb_found); /* Don't update the ring cycle state for the producer (us). */ addr = xhci_trb_virt_to_dma(new_seg, new_deq); From cab63934c33b12c0d1e9f4da7450928057f2c142 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 5 May 2025 15:56:30 +0300 Subject: [PATCH 388/559] xhci: dbc: Avoid event polling busyloop if pending rx transfers are inactive. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Event polling delay is set to 0 if there are any pending requests in either rx or tx requests lists. Checking for pending requests does not work well for "IN" transfers as the tty driver always queues requests to the list and TRBs to the ring, preparing to receive data from the host. This causes unnecessary busylooping and cpu hogging. Only set the event polling delay to 0 if there are pending tx "write" transfers, or if it was less than 10ms since last active data transfer in any direction. Cc: Łukasz Bartosik Fixes: fb18e5bb9660 ("xhci: dbc: poll at different rate depending on data transfer activity") Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250505125630.561699-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-dbgcap.c | 19 ++++++++++++++++--- drivers/usb/host/xhci-dbgcap.h | 3 +++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index fd7895b24367..0d4ce5734165 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -823,6 +823,7 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) { dma_addr_t deq; union xhci_trb *evt; + enum evtreturn ret = EVT_DONE; u32 ctrl, portsc; bool update_erdp = false; @@ -909,6 +910,7 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) break; case TRB_TYPE(TRB_TRANSFER): dbc_handle_xfer_event(dbc, evt); + ret = EVT_XFER_DONE; break; default: break; @@ -927,7 +929,7 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) lo_hi_writeq(deq, &dbc->regs->erdp); } - return EVT_DONE; + return ret; } static void xhci_dbc_handle_events(struct work_struct *work) @@ -936,6 +938,7 @@ static void xhci_dbc_handle_events(struct work_struct *work) struct xhci_dbc *dbc; unsigned long flags; unsigned int poll_interval; + unsigned long busypoll_timelimit; dbc = container_of(to_delayed_work(work), struct xhci_dbc, event_work); poll_interval = dbc->poll_interval; @@ -954,11 +957,21 @@ static void xhci_dbc_handle_events(struct work_struct *work) dbc->driver->disconnect(dbc); break; case EVT_DONE: - /* set fast poll rate if there are pending data transfers */ + /* + * Set fast poll rate if there are pending out transfers, or + * a transfer was recently processed + */ + busypoll_timelimit = dbc->xfer_timestamp + + msecs_to_jiffies(DBC_XFER_INACTIVITY_TIMEOUT); + if (!list_empty(&dbc->eps[BULK_OUT].list_pending) || - !list_empty(&dbc->eps[BULK_IN].list_pending)) + time_is_after_jiffies(busypoll_timelimit)) poll_interval = 0; break; + case EVT_XFER_DONE: + dbc->xfer_timestamp = jiffies; + poll_interval = 0; + break; default: dev_info(dbc->dev, "stop handling dbc events\n"); return; diff --git a/drivers/usb/host/xhci-dbgcap.h b/drivers/usb/host/xhci-dbgcap.h index 9dc8f4d8077c..47ac72c2286d 100644 --- a/drivers/usb/host/xhci-dbgcap.h +++ b/drivers/usb/host/xhci-dbgcap.h @@ -96,6 +96,7 @@ struct dbc_ep { #define DBC_WRITE_BUF_SIZE 8192 #define DBC_POLL_INTERVAL_DEFAULT 64 /* milliseconds */ #define DBC_POLL_INTERVAL_MAX 5000 /* milliseconds */ +#define DBC_XFER_INACTIVITY_TIMEOUT 10 /* milliseconds */ /* * Private structure for DbC hardware state: */ @@ -142,6 +143,7 @@ struct xhci_dbc { enum dbc_state state; struct delayed_work event_work; unsigned int poll_interval; /* ms */ + unsigned long xfer_timestamp; unsigned resume_required:1; struct dbc_ep eps[2]; @@ -187,6 +189,7 @@ struct dbc_request { enum evtreturn { EVT_ERR = -1, EVT_DONE, + EVT_XFER_DONE, EVT_GSER, EVT_DISC, }; From 844f766e02d0aba973d78f3ade38ad8bae399347 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 4 May 2025 15:01:34 -0400 Subject: [PATCH 389/559] bcachefs: Improve want_cached_ptr() If promote target isn't set, rebalance should still leave a cached copy on the faster device. Fall back to foreground_target if it's set, or allow a cached copy on any device if neither are set. Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index dca2b8425cc0..e597fb9c9823 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1056,8 +1056,9 @@ bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bke static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts, struct bch_extent_ptr *ptr) { - if (!opts->promote_target || - !bch2_dev_in_target(c, ptr->dev, opts->promote_target)) + unsigned target = opts->promote_target ?: opts->foreground_target; + + if (target && !bch2_dev_in_target(c, ptr->dev, target)) return false; struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); From 50a7b899a0d806f961edadfc3357cb6679826795 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 4 May 2025 16:31:40 -0400 Subject: [PATCH 390/559] bcachefs: Ensure proper write alignment There was a buggy version of bcachefs-tools which picked misaligned bucket sizes when formatting, and we're also about to do dynamic block sizes - which will allow picking logical block size or physical block size of the device per-write, allowing for better compression ratios at the cost of slightly worse write performance (i.e. forcing the device to do RMW or extra buffering). To account for this, tweak bch2_alloc_sectors_start() to properly align open_buckets to the blocksize of the write we're about to do. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index effafc3e0ced..7ec022e9361a 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1422,11 +1422,31 @@ alloc_done: wp->sectors_free = UINT_MAX; - open_bucket_for_each(c, &wp->ptrs, ob, i) + open_bucket_for_each(c, &wp->ptrs, ob, i) { + /* + * Ensure proper write alignment - either due to misaligned + * bucket sizes (from buggy bcachefs-tools), or writes that mix + * logical/physical alignment: + */ + struct bch_dev *ca = ob_dev(c, ob); + u64 offset = bucket_to_sector(ca, ob->bucket) + + ca->mi.bucket_size - + ob->sectors_free; + unsigned align = round_up(offset, block_sectors(c)) - offset; + + ob->sectors_free = max_t(int, 0, ob->sectors_free - align); + wp->sectors_free = min(wp->sectors_free, ob->sectors_free); + } wp->sectors_free = rounddown(wp->sectors_free, block_sectors(c)); + /* Did alignment use up space in an open_bucket? */ + if (unlikely(!wp->sectors_free)) { + bch2_alloc_sectors_done(c, wp); + goto retry; + } + BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX); return 0; From 7a69fa65718a7258aa89fca06b975f4357daeac3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 5 May 2025 14:13:21 -0400 Subject: [PATCH 391/559] bcachefs: Add missing barriers before wake_up_bit() wake_up() doesn't require a barrier - but wake_up_bit() does. This only affected non x86, and primarily lead to lost wakeups after btree node reads. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 9 ++++++++- fs/bcachefs/buckets.h | 1 + fs/bcachefs/ec.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 5fd4a58d2ad2..60782f3e5aec 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -41,6 +41,7 @@ void bch2_btree_node_io_unlock(struct btree *b) clear_btree_node_write_in_flight_inner(b); clear_btree_node_write_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); } @@ -1400,6 +1401,7 @@ start: printbuf_exit(&buf); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } @@ -1595,6 +1597,7 @@ fsck_err: printbuf_exit(&buf); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } @@ -1721,6 +1724,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, set_btree_node_read_error(b); bch2_btree_lost_data(c, b->c.btree_id); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); printbuf_exit(&buf); return; @@ -2061,8 +2065,10 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start if (new & (1U << BTREE_NODE_write_in_flight)) __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED|type); - else + else { + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); + } } static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time) @@ -2175,6 +2181,7 @@ static void btree_node_write_endio(struct bio *bio) } clear_btree_node_write_in_flight_inner(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner); INIT_WORK(&wb->work, btree_node_write_work); queue_work(c->btree_io_complete_wq, &wb->work); diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 8d75b27a1418..af1532de4a37 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -44,6 +44,7 @@ static inline void bucket_unlock(struct bucket *b) BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte); clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &b->lock); + smp_mb__after_atomic(); wake_up_bit((void *) &b->lock, BUCKET_LOCK_BITNR); } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 62d27e04d763..51893e1ee874 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -160,6 +160,7 @@ static inline void gc_stripe_unlock(struct gc_stripe *s) BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte); clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &s->lock); + smp_mb__after_atomic(); wake_up_bit((void *) &s->lock, BUCKET_LOCK_BITNR); } From aed4ccbf4595e08f3fa80c7faaf1d2188d61bc70 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 4 May 2025 18:46:16 -0400 Subject: [PATCH 392/559] bcachefs: fix hung task timeout in journal read Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 63cdf885c9e2..ded18a94ed02 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -19,6 +19,7 @@ #include #include +#include void bch2_journal_pos_from_member_info_set(struct bch_fs *c) { @@ -1262,7 +1263,8 @@ int bch2_journal_read(struct bch_fs *c, degraded = true; } - closure_sync(&jlist.cl); + while (closure_sync_timeout(&jlist.cl, sysctl_hung_task_timeout_secs * HZ / 2)) + ; if (jlist.ret) return jlist.ret; From 157dbc4a321f5bb6f8b6c724d12ba720a90f1a7c Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 5 May 2025 19:31:48 +0200 Subject: [PATCH 393/559] KVM: arm64: Fix uninitialized memcache pointer in user_mem_abort() Commit fce886a60207 ("KVM: arm64: Plumb the pKVM MMU in KVM") made the initialization of the local memcache variable in user_mem_abort() conditional, leaving a codepath where it is used uninitialized via kvm_pgtable_stage2_map(). This can fail on any path that requires a stage-2 allocation without transition via a permission fault or dirty logging. Fix this by making sure that memcache is always valid. Fixes: fce886a60207 ("KVM: arm64: Plumb the pKVM MMU in KVM") Signed-off-by: Sebastian Ott Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/kvmarm/3f5db4c7-ccce-fb95-595c-692fa7aad227@redhat.com/ Link: https://lore.kernel.org/r/20250505173148.33900-1-sebott@redhat.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 754f2fe0cc67..eeda92330ade 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1501,6 +1501,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } + if (!is_protected_kvm_enabled()) + memcache = &vcpu->arch.mmu_page_cache; + else + memcache = &vcpu->arch.pkvm_memcache; + /* * Permission faults just need to update the existing leaf entry, * and so normally don't require allocations from the memcache. The @@ -1510,13 +1515,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (!fault_is_perm || (logging_active && write_fault)) { int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); - if (!is_protected_kvm_enabled()) { - memcache = &vcpu->arch.mmu_page_cache; + if (!is_protected_kvm_enabled()) ret = kvm_mmu_topup_memory_cache(memcache, min_pages); - } else { - memcache = &vcpu->arch.pkvm_memcache; + else ret = topup_hyp_memcache(memcache, min_pages); - } + if (ret) return ret; } From 859c60276e12a3a18c65a3f9325e656a068c9b62 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 Apr 2025 12:43:26 +0100 Subject: [PATCH 394/559] KVM: arm64: Force HCR_EL2.xMO to 1 at all times in VHE mode We keep setting and clearing these bits depending on the role of the host kernel, mimicking what we do for nVHE. But that's actually pretty pointless, as we always want physical interrupts to make it to the host, at EL2. This has also two problems: - it prevents IRQs from being taken when these bits are cleared if the implementation has chosen to implement these bits as masks when HCR_EL2.{TGE,xMO}=={0,0} - it triggers a bad erratum on the AmpereOne HW, which catches fire on clearing these bits while an interrupt is being taken (AC03_CPU_36). Let's kill these two birds with a single stone, and permanently set the xMO bits when running VHE. This involves a bit of surgery on code paths that rely on flipping these bits on and off for other purposes. Note that the earliest setting of hcr_el2 (in the init_hcr_el2 macro) is left untouched as is runs extremely early, with interrupts disabled, and soon enough overwritten with the final value containing the xMO bits. Reported-by: D Scott Phillips Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20250429114326.3618875-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_arm.h | 2 +- arch/arm64/kvm/hyp/vgic-v3-sr.c | 36 +++++++++++++++++++------------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 974d72b5905b..bba4b0e93091 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -100,7 +100,7 @@ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) -#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) +#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO) #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) #define MPAMHCR_HOST_FLAGS 0 diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index ed363aa3027e..50aa8dbcae75 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -429,23 +429,27 @@ u64 __vgic_v3_get_gic_config(void) /* * To check whether we have a MMIO-based (GICv2 compatible) * CPU interface, we need to disable the system register - * view. To do that safely, we have to prevent any interrupt - * from firing (which would be deadly). + * view. * - * Note that this only makes sense on VHE, as interrupts are - * already masked for nVHE as part of the exception entry to - * EL2. - */ - if (has_vhe()) - flags = local_daif_save(); - - /* * Table 11-2 "Permitted ICC_SRE_ELx.SRE settings" indicates * that to be able to set ICC_SRE_EL1.SRE to 0, all the * interrupt overrides must be set. You've got to love this. + * + * As we always run VHE with HCR_xMO set, no extra xMO + * manipulation is required in that case. + * + * To safely disable SRE, we have to prevent any interrupt + * from firing (which would be deadly). This only makes sense + * on VHE, as interrupts are already masked for nVHE as part + * of the exception entry to EL2. */ - sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO); - isb(); + if (has_vhe()) { + flags = local_daif_save(); + } else { + sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO); + isb(); + } + write_gicreg(0, ICC_SRE_EL1); isb(); @@ -453,11 +457,13 @@ u64 __vgic_v3_get_gic_config(void) write_gicreg(sre, ICC_SRE_EL1); isb(); - sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0); - isb(); - if (has_vhe()) + if (has_vhe()) { local_daif_restore(flags); + } else { + sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0); + isb(); + } val = (val & ICC_SRE_EL1_SRE) ? 0 : (1ULL << 63); val |= read_gicreg(ICH_VTR_EL2); From 7af7cfbe78e23af0ac3cbe9b2b16da69f1412222 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 Apr 2025 12:41:16 +0100 Subject: [PATCH 395/559] KVM: arm64: Prevent userspace from disabling AArch64 support at any virtualisable EL A sorry excuse for a selftest is trying to disable AArch64 support. And yes, this goes as well as you can imagine. Let's forbid this sort of things. Normal userspace shouldn't get caught doing that. Signed-off-by: Marc Zyngier Reviewed-by: Ganapatrao Kulkarni Link: https://lore.kernel.org/r/20250429114117.3618800-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 005ad28f7306..5dde9285afc8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1945,6 +1945,12 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, if ((hw_val & mpam_mask) == (user_val & mpam_mask)) user_val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + /* Fail the guest's request to disable the AA64 ISA at EL{0,1,2} */ + if (!FIELD_GET(ID_AA64PFR0_EL1_EL0, user_val) || + !FIELD_GET(ID_AA64PFR0_EL1_EL1, user_val) || + (vcpu_has_nv(vcpu) && !FIELD_GET(ID_AA64PFR0_EL1_EL2, user_val))) + return -EINVAL; + return set_id_reg(vcpu, rd, user_val); } From b60e285b6acdec6d24fbde36d59f6d806f91cdc6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 Apr 2025 12:41:17 +0100 Subject: [PATCH 396/559] KVM: arm64: selftest: Don't try to disable AArch64 support Trying to cut the branch you are sat on is pretty dumb. And so is trying to disable the instruction set you are executing on. Signed-off-by: Marc Zyngier Reviewed-by: Ganapatrao Kulkarni Link: https://lore.kernel.org/r/20250429114117.3618800-3-maz@kernel.org Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/arm64/set_id_regs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 322b9d3b0125..57708de2075d 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -129,10 +129,10 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0), REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 1), REG_FTR_END, }; From 9c618560994794d6f477e81f3b695fe799feec8a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 5 May 2025 15:52:57 -0400 Subject: [PATCH 397/559] bcachefs: Call bch2_fs_start before getting vfs superblock This reverts 1fdbe0b184c8 bcachefs: Make sure c->vfs_sb is set before starting fs switched up bch2_fs_get_tree() so that we got a superblock before calling bch2_fs_start, so that c->vfs_sb would always be initialized while the filesystem was active. This turned out not to be necessary, because blk_holder_ops were implemented using our own locking, not vfs locking. And this had the side effect of creating a super_block and doing our full recovery (including potentially fsck) before setting SB_BORN, which causes things like sync calls to hang until our recovery is finished. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 113db85b6ef9..b6801861c66f 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -2502,10 +2502,9 @@ static int bch2_fs_get_tree(struct fs_context *fc) bch2_opts_apply(&c->opts, opts); - /* - * need to initialise sb and set c->vfs_sb _before_ starting fs, - * for blk_holder_ops - */ + ret = bch2_fs_start(c); + if (ret) + goto err_stop_fs; sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c); ret = PTR_ERR_OR_ZERO(sb); @@ -2567,10 +2566,6 @@ got_sb: sb->s_shrink->seeks = 0; - ret = bch2_fs_start(c); - if (ret) - goto err_put_super; - #ifdef CONFIG_UNICODE sb->s_encoding = c->cf_encoding; #endif From 3769478610135e82b262640252d90f6efb05be71 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 28 Apr 2025 16:29:54 -0700 Subject: [PATCH 398/559] sch_htb: make htb_deactivate() idempotent Alan reported a NULL pointer dereference in htb_next_rb_node() after we made htb_qlen_notify() idempotent. It turns out in the following case it introduced some regression: htb_dequeue_tree(): |-> fq_codel_dequeue() |-> qdisc_tree_reduce_backlog() |-> htb_qlen_notify() |-> htb_deactivate() |-> htb_next_rb_node() |-> htb_deactivate() For htb_next_rb_node(), after calling the 1st htb_deactivate(), the clprio[prio]->ptr could be already set to NULL, which means htb_next_rb_node() is vulnerable here. For htb_deactivate(), although we checked qlen before calling it, in case of qlen==0 after qdisc_tree_reduce_backlog(), we may call it again which triggers the warning inside. To fix the issues here, we need to: 1) Make htb_deactivate() idempotent, that is, simply return if we already call it before. 2) Make htb_next_rb_node() safe against ptr==NULL. Many thanks to Alan for testing and for the reproducer. Fixes: 5ba8b837b522 ("sch_htb: make htb_qlen_notify() idempotent") Reported-by: Alan J. Wylie Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250428232955.1740419-2-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_htb.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 4b9a639b642e..14bf71f57057 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -348,7 +348,8 @@ static void htb_add_to_wait_tree(struct htb_sched *q, */ static inline void htb_next_rb_node(struct rb_node **n) { - *n = rb_next(*n); + if (*n) + *n = rb_next(*n); } /** @@ -609,8 +610,8 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) */ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) { - WARN_ON(!cl->prio_activity); - + if (!cl->prio_activity) + return; htb_deactivate_prios(q, cl); cl->prio_activity = 0; } @@ -1485,8 +1486,6 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; - if (!cl->prio_activity) - return; htb_deactivate(qdisc_priv(sch), cl); } @@ -1740,8 +1739,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, if (cl->parent) cl->parent->children--; - if (cl->prio_activity) - htb_deactivate(q, cl); + htb_deactivate(q, cl); if (cl->cmode != HTB_CAN_SEND) htb_safe_rb_erase(&cl->pq_node, @@ -1949,8 +1947,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* turn parent into inner node */ qdisc_purge_queue(parent->leaf.q); parent_qdisc = parent->leaf.q; - if (parent->prio_activity) - htb_deactivate(q, parent); + htb_deactivate(q, parent); /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { From 63890286f557aa5c4eed7e90a5a31658de8fdb4d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 28 Apr 2025 16:29:55 -0700 Subject: [PATCH 399/559] selftests/tc-testing: Add a test case to cover basic HTB+FQ_CODEL case Integrate the reproducer from Alan into TC selftests and use scapy to generate TCP traffic instead of relying on ping command. Cc: Alan J. Wylie Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250428232955.1740419-3-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 0843f6d37e9c..a951c0d33cd2 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -538,5 +538,40 @@ "$TC qdisc del dev $DUMMY handle 1:0 root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "62c4", + "name": "Test HTB with FQ_CODEL - basic functionality", + "category": [ + "qdisc", + "htb", + "fq_codel" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 root handle 1: htb default 11", + "$TC class add dev $DEV1 parent 1: classid 1:1 htb rate 10kbit", + "$TC class add dev $DEV1 parent 1:1 classid 1:11 htb rate 10kbit prio 0 quantum 1486", + "$TC qdisc add dev $DEV1 parent 1:11 fq_codel quantum 300 noecn", + "sleep 0.5" + ], + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/TCP(sport=12345, dport=80)" + }, + "cmdUnderTest": "$TC -s qdisc show dev $DEV1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DEV1 | grep -A 5 'qdisc fq_codel'", + "matchPattern": "Sent [0-9]+ bytes [0-9]+ pkt", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 handle 1: root" + ] } ] From 1e20324b23f0afba27997434fb978f1e4a1dbcb6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Apr 2025 09:37:58 -0700 Subject: [PATCH 400/559] virtio-net: don't re-enable refill work too early when NAPI is disabled Commit 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx") fixed a deadlock between reconfig paths and refill work trying to disable the same NAPI instance. The refill work can't run in parallel with reconfig because trying to double-disable a NAPI instance causes a stall under the instance lock, which the reconfig path needs to re-enable the NAPI and therefore unblock the stalled thread. There are two cases where we re-enable refill too early. One is in the virtnet_set_queues() handler. We call it when installing XDP: virtnet_rx_pause_all(vi); ... virtnet_napi_tx_disable(..); ... virtnet_set_queues(..); ... virtnet_rx_resume_all(..); We want the work to be disabled until we call virtnet_rx_resume_all(), but virtnet_set_queues() kicks it before NAPIs were re-enabled. The other case is a more trivial case of mis-ordering in __virtnet_rx_resume() found by code inspection. Taking the spin lock in virtnet_set_queues() (requested during review) may be unnecessary as we are under rtnl_lock and so are all paths writing to ->refill_enabled. Acked-by: Michael S. Tsirkin Reviewed-by: Bui Quang Minh Fixes: 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx") Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()") Link: https://patch.msgid.link/20250430163758.3029367-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 848fab51dfa1..b5549d542c02 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3383,12 +3383,15 @@ static void __virtnet_rx_resume(struct virtnet_info *vi, bool refill) { bool running = netif_running(vi->dev); + bool schedule_refill = false; if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); - + schedule_refill = true; if (running) virtnet_napi_enable(rq); + + if (schedule_refill) + schedule_delayed_work(&vi->refill, 0); } static void virtnet_rx_resume_all(struct virtnet_info *vi) @@ -3728,8 +3731,10 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) succ: vi->curr_queue_pairs = queue_pairs; /* virtnet_open() will refill when device is going to up. */ - if (dev->flags & IFF_UP) + spin_lock_bh(&vi->refill_lock); + if (dev->flags & IFF_UP && vi->refill_enabled) schedule_delayed_work(&vi->refill, 0); + spin_unlock_bh(&vi->refill_lock); return 0; } From 4397684a292a71fbc1e815c3e283f7490ddce5ae Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Apr 2025 09:38:36 -0700 Subject: [PATCH 401/559] virtio-net: free xsk_buffs on error in virtnet_xsk_pool_enable() The selftests added to our CI by Bui Quang Minh recently reveals that there is a mem leak on the error path of virtnet_xsk_pool_enable(): unreferenced object 0xffff88800a68a000 (size 2048): comm "xdp_helper", pid 318, jiffies 4294692778 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): __kvmalloc_node_noprof+0x402/0x570 virtnet_xsk_pool_enable+0x293/0x6a0 (drivers/net/virtio_net.c:5882) xp_assign_dev+0x369/0x670 (net/xdp/xsk_buff_pool.c:226) xsk_bind+0x6a5/0x1ae0 __sys_bind+0x15e/0x230 __x64_sys_bind+0x72/0xb0 do_syscall_64+0xc1/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f Acked-by: Jason Wang Fixes: e9f3962441c0 ("virtio_net: xsk: rx: support fill with xsk buffer") Link: https://patch.msgid.link/20250430163836.3029761-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b5549d542c02..f9e3e628ec4d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -5890,8 +5890,10 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, DMA_TO_DEVICE, 0); - if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) - return -ENOMEM; + if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) { + err = -ENOMEM; + goto err_free_buffs; + } err = xsk_pool_dma_map(pool, dma_dev, 0); if (err) @@ -5919,6 +5921,8 @@ err_rq: err_xsk_map: virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, DMA_TO_DEVICE, 0); +err_free_buffs: + kvfree(rq->xsk_buffs); return err; } From e66b0a8f048bc8590eb1047480f946898a3f80c9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 15 Apr 2025 09:52:30 +0200 Subject: [PATCH 402/559] i2c: omap: fix deprecated of_property_read_bool() use Using of_property_read_bool() for non-boolean properties is deprecated and results in a warning during runtime since commit c141ecc3cecd ("of: Warn when of_property_read_bool() is used on non-boolean properties"). Fixes: b6ef830c60b6 ("i2c: omap: Add support for setting mux") Cc: Jayesh Choudhary Signed-off-by: Johan Hovold Acked-by: Mukesh Kumar Savaliya Link: https://lore.kernel.org/r/20250415075230.16235-1-johan+linaro@kernel.org Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 16afb9ca19bb..876791d20ed5 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1454,7 +1454,7 @@ omap_i2c_probe(struct platform_device *pdev) (1000 * omap->speed / 8); } - if (of_property_read_bool(node, "mux-states")) { + if (of_property_present(node, "mux-states")) { struct mux_state *mux_state; mux_state = devm_mux_state_get(&pdev->dev, NULL); From c360eb0c3ccb95306704fd221442283ee82f1f58 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 30 Apr 2025 11:21:35 -0500 Subject: [PATCH 403/559] dt-bindings: net: ethernet-controller: Add informative text about RGMII delays Device Tree and Ethernet MAC driver writers often misunderstand RGMII delays. Rewrite the Normative section in terms of the PCB, is the PCB adding the 2ns delay. This meaning was previous implied by the definition, but often wrongly interpreted due to the ambiguous wording and looking at the definition from the wrong perspective. The new definition concentrates clearly on the hardware, and should be less ambiguous. Add an Informative section to the end of the binding describing in detail what the four RGMII delays mean. This expands on just the PCB meaning, adding in the implications for the MAC and PHY. Additionally, when the MAC or PHY needs to add a delay, which is software configuration, describe how Linux does this, in the hope of reducing errors. Make it clear other users of device tree binding may implement the software configuration in other ways while still conforming to the binding. Fixes: 9d3de3c58347 ("dt-bindings: net: Add YAML schemas for the generic Ethernet options") Signed-off-by: Andrew Lunn Acked-by: Conor Dooley Link: https://patch.msgid.link/20250430-v6-15-rc3-net-rgmii-delays-v2-1-099ae651d5e5@lunn.ch Signed-off-by: Jakub Kicinski --- .../bindings/net/ethernet-controller.yaml | 97 +++++++++++++++++-- 1 file changed, 90 insertions(+), 7 deletions(-) diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 45819b235800..a2d4c626f659 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -74,19 +74,17 @@ properties: - rev-rmii - moca - # RX and TX delays are added by the MAC when required + # RX and TX delays are provided by the PCB. See below - rgmii - # RGMII with internal RX and TX delays provided by the PHY, - # the MAC should not add the RX or TX delays in this case + # RX and TX delays are not provided by the PCB. This is the most + # frequent case. See below - rgmii-id - # RGMII with internal RX delay provided by the PHY, the MAC - # should not add an RX delay in this case + # TX delay is provided by the PCB. See below - rgmii-rxid - # RGMII with internal TX delay provided by the PHY, the MAC - # should not add an TX delay in this case + # RX delay is provided by the PCB. See below - rgmii-txid - rtbi - smii @@ -286,4 +284,89 @@ allOf: additionalProperties: true +# Informative +# =========== +# +# 'phy-modes' & 'phy-connection-type' properties 'rgmii', 'rgmii-id', +# 'rgmii-rxid', and 'rgmii-txid' are frequently used wrongly by +# developers. This informative section clarifies their usage. +# +# The RGMII specification requires a 2ns delay between the data and +# clock signals on the RGMII bus. How this delay is implemented is not +# specified. +# +# One option is to make the clock traces on the PCB longer than the +# data traces. A sufficiently difference in length can provide the 2ns +# delay. If both the RX and TX delays are implemented in this manner, +# 'rgmii' should be used, so indicating the PCB adds the delays. +# +# If the PCB does not add these delays via extra long traces, +# 'rgmii-id' should be used. Here, 'id' refers to 'internal delay', +# where either the MAC or PHY adds the delay. +# +# If only one of the two delays are implemented via extra long clock +# lines, either 'rgmii-rxid' or 'rgmii-txid' should be used, +# indicating the MAC or PHY should implement one of the delays +# internally, while the PCB implements the other delay. +# +# Device Tree describes hardware, and in this case, it describes the +# PCB between the MAC and the PHY, if the PCB implements delays or +# not. +# +# In practice, very few PCBs make use of extra long clock lines. Hence +# any RGMII phy mode other than 'rgmii-id' is probably wrong, and is +# unlikely to be accepted during review without details provided in +# the commit description and comments in the .dts file. +# +# When the PCB does not implement the delays, the MAC or PHY must. As +# such, this is software configuration, and so not described in Device +# Tree. +# +# The following describes how Linux implements the configuration of +# the MAC and PHY to add these delays when the PCB does not. As stated +# above, developers often get this wrong, and the aim of this section +# is reduce the frequency of these errors by Linux developers. Other +# users of the Device Tree may implement it differently, and still be +# consistent with both the normative and informative description +# above. +# +# By default in Linux, when using phylib/phylink, the MAC is expected +# to read the 'phy-mode' from Device Tree, not implement any delays, +# and pass the value to the PHY. The PHY will then implement delays as +# specified by the 'phy-mode'. The PHY should always be reconfigured +# to implement the needed delays, replacing any setting performed by +# strapping or the bootloader, etc. +# +# Experience to date is that all PHYs which implement RGMII also +# implement the ability to add or not add the needed delays. Hence +# this default is expected to work in all cases. Ignoring this default +# is likely to be questioned by Reviews, and require a strong argument +# to be accepted. +# +# There are a small number of cases where the MAC has hard coded +# delays which cannot be disabled. The 'phy-mode' only describes the +# PCB. The inability to disable the delays in the MAC does not change +# the meaning of 'phy-mode'. It does however mean that a 'phy-mode' of +# 'rgmii' is now invalid, it cannot be supported, since both the PCB +# and the MAC and PHY adding delays cannot result in a functional +# link. Thus the MAC should report a fatal error for any modes which +# cannot be supported. When the MAC implements the delay, it must +# ensure that the PHY does not also implement the same delay. So it +# must modify the phy-mode it passes to the PHY, removing the delay it +# has added. Failure to remove the delay will result in a +# non-functioning link. +# +# Sometimes there is a need to fine tune the delays. Often the MAC or +# PHY can perform this fine tuning. In the MAC node, the Device Tree +# properties 'rx-internal-delay-ps' and 'tx-internal-delay-ps' should +# be used to indicate fine tuning performed by the MAC. The values +# expected here are small. A value of 2000ps, i.e 2ns, and a phy-mode +# of 'rgmii' will not be accepted by Reviewers. +# +# If the PHY is to perform fine tuning, the properties +# 'rx-internal-delay-ps' and 'tx-internal-delay-ps' in the PHY node +# should be used. When the PHY is implementing delays, e.g. 'rgmii-id' +# these properties should have a value near to 2000ps. If the PCB is +# implementing delays, e.g. 'rgmii', a small value can be used to fine +# tune the delay added by the PCB. ... From 3e6a0243ff002ddbd7ee18a8974ae61d2e6ed00d Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 3 May 2025 00:57:52 +0200 Subject: [PATCH 404/559] gre: Fix again IPv6 link-local address generation. Use addrconf_addr_gen() to generate IPv6 link-local addresses on GRE devices in most cases and fall back to using add_v4_addrs() only in case the GRE configuration is incompatible with addrconf_addr_gen(). GRE used to use addrconf_addr_gen() until commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") restricted this use to gretap and ip6gretap devices, and created add_v4_addrs() (borrowed from SIT) for non-Ethernet GRE ones. The original problem came when commit 9af28511be10 ("addrconf: refuse isatap eui64 for INADDR_ANY") made __ipv6_isatap_ifid() fail when its addr parameter was 0. The commit says that this would create an invalid address, however, I couldn't find any RFC saying that the generated interface identifier would be wrong. Anyway, since gre over IPv4 devices pass their local tunnel address to __ipv6_isatap_ifid(), that commit broke their IPv6 link-local address generation when the local address was unspecified. Then commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") tried to fix that case by defining add_v4_addrs() and calling it to generate the IPv6 link-local address instead of using addrconf_addr_gen() (apart for gretap and ip6gretap devices, which would still use the regular addrconf_addr_gen(), since they have a MAC address). That broke several use cases because add_v4_addrs() isn't properly integrated into the rest of IPv6 Neighbor Discovery code. Several of these shortcomings have been fixed over time, but add_v4_addrs() remains broken on several aspects. In particular, it doesn't send any Router Sollicitations, so the SLAAC process doesn't start until the interface receives a Router Advertisement. Also, add_v4_addrs() mostly ignores the address generation mode of the interface (/proc/sys/net/ipv6/conf/*/addr_gen_mode), thus breaking the IN6_ADDR_GEN_MODE_RANDOM and IN6_ADDR_GEN_MODE_STABLE_PRIVACY cases. Fix the situation by using add_v4_addrs() only in the specific scenario where the normal method would fail. That is, for interfaces that have all of the following characteristics: * run over IPv4, * transport IP packets directly, not Ethernet (that is, not gretap interfaces), * tunnel endpoint is INADDR_ANY (that is, 0), * device address generation mode is EUI64. In all other cases, revert back to the regular addrconf_addr_gen(). Also, remove the special case for ip6gre interfaces in add_v4_addrs(), since ip6gre devices now always use addrconf_addr_gen() instead. Note: This patch was originally applied as commit 183185a18ff9 ("gre: Fix IPv6 link-local address generation."). However, it was then reverted by commit fc486c2d060f ("Revert "gre: Fix IPv6 link-local address generation."") because it uncovered another bug that ended up breaking net/forwarding/ip6gre_custom_multipath_hash.sh. That other bug has now been fixed by commit 4d0ab3a6885e ("ipv6: Start path selection from the first nexthop"). Therefore we can now revive this GRE patch (no changes since original commit 183185a18ff9 ("gre: Fix IPv6 link-local address generation."). Fixes: e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/a88cc5c4811af36007645d610c95102dccb360a6.1746225214.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9ba83f0c9928..c6b22170dc49 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3214,16 +3214,13 @@ static void add_v4_addrs(struct inet6_dev *idev) struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); - int scope, plen, offset = 0; + int scope, plen; u32 pflags = 0; ASSERT_RTNL(); memset(&addr, 0, sizeof(struct in6_addr)); - /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ - if (idev->dev->addr_len == sizeof(struct in6_addr)) - offset = sizeof(struct in6_addr) - 4; - memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { scope = IPV6_ADDR_COMPATv4; @@ -3534,7 +3531,13 @@ static void addrconf_gre_config(struct net_device *dev) return; } - if (dev->type == ARPHRD_ETHER) { + /* Generate the IPv6 link-local address using addrconf_addr_gen(), + * unless we have an IPv4 GRE device not bound to an IP address and + * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this + * case). Such devices fall back to add_v4_addrs() instead. + */ + if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { addrconf_addr_gen(idev, true); return; } From b6a6006b0e3d10e62c465613f07ff49268baedb1 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 3 May 2025 00:57:59 +0200 Subject: [PATCH 405/559] selftests: Add IPv6 link-local address generation tests for GRE devices. GRE devices have their special code for IPv6 link-local address generation that has been the source of several regressions in the past. Add selftest to check that all gre, ip6gre, gretap and ip6gretap get an IPv6 link-link local address in accordance with the net.ipv6.conf..addr_gen_mode sysctl. Note: This patch was originally applied as commit 6f50175ccad4 ("selftests: Add IPv6 link-local address generation tests for GRE devices."). However, it was then reverted by commit 355d940f4d5a ("Revert "selftests: Add IPv6 link-local address generation tests for GRE devices."") because the commit it depended on was going to be reverted. Now that the situation is resolved, we can add this selftest again (no changes since original patch, appart from context update in tools/testing/selftests/net/Makefile). Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/2c3a5733cb3a6e3119504361a9b9f89fda570a2d.1746225214.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + .../testing/selftests/net/gre_ipv6_lladdr.sh | 177 ++++++++++++++++++ 2 files changed, 178 insertions(+) create mode 100755 tools/testing/selftests/net/gre_ipv6_lladdr.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 124078b56fa4..70a38f485d4d 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -31,6 +31,7 @@ TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh +TEST_PROGS += gre_ipv6_lladdr.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += test_so_rcv.sh diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh new file mode 100755 index 000000000000..5b34f6e1f831 --- /dev/null +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -0,0 +1,177 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./lib.sh + +PAUSE_ON_FAIL="no" + +# The trap function handler +# +exit_cleanup_all() +{ + cleanup_all_ns + + exit "${EXIT_STATUS}" +} + +# Add fake IPv4 and IPv6 networks on the loopback device, to be used as +# underlay by future GRE devices. +# +setup_basenet() +{ + ip -netns "${NS0}" link set dev lo up + ip -netns "${NS0}" address add dev lo 192.0.2.10/24 + ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad +} + +# Check if network device has an IPv6 link-local address assigned. +# +# Parameters: +# +# * $1: The network device to test +# * $2: An extra regular expression that should be matched (to verify the +# presence of extra attributes) +# * $3: The expected return code from grep (to allow checking the absence of +# a link-local address) +# * $4: The user visible name for the scenario being tested +# +check_ipv6_ll_addr() +{ + local DEV="$1" + local EXTRA_MATCH="$2" + local XRET="$3" + local MSG="$4" + + RET=0 + set +e + ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" + check_err_fail "${XRET}" $? "" + log_test "${MSG}" + set -e +} + +# Create a GRE device and verify that it gets an IPv6 link-local address as +# expected. +# +# Parameters: +# +# * $1: The device type (gre, ip6gre, gretap or ip6gretap) +# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any") +# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any") +# * $4: The IPv6 interface identifier generation mode to use for the GRE +# device (eui64, none, stable-privacy or random). +# +test_gre_device() +{ + local GRE_TYPE="$1" + local LOCAL_IP="$2" + local REMOTE_IP="$3" + local MODE="$4" + local ADDR_GEN_MODE + local MATCH_REGEXP + local MSG + + ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}" + + case "${MODE}" in + "eui64") + ADDR_GEN_MODE=0 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + "none") + ADDR_GEN_MODE=1 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=1 # No link-local address should be generated + ;; + "stable-privacy") + ADDR_GEN_MODE=2 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + # Initialise stable_secret (required for stable-privacy mode) + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd" + ;; + "random") + ADDR_GEN_MODE=3 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + esac + + # Check that IPv6 link-local address is generated when device goes up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + ip -netns "${NS0}" link set dev gretest up + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + + # Now disable link-local address generation + ip -netns "${NS0}" link set dev gretest down + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 + ip -netns "${NS0}" link set dev gretest up + + # Check that link-local address generation works when re-enabled while + # the device is already up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + + ip -netns "${NS0}" link del dev gretest +} + +test_gre4() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "gre" "gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}" + done + done +} + +test_gre6() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "ip6gre" "ip6gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}" + done + done +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +setup_ns NS0 + +set -e +trap exit_cleanup_all EXIT + +setup_basenet + +test_gre4 +test_gre6 From b344a48cbe5fb24697addc6afbb7358b938a7d31 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Fri, 2 May 2025 18:35:16 -0700 Subject: [PATCH 406/559] selftests: drv: net: fix test failure on ipv6 sys The `get_interface_info` call has ip version hard-coded which leads to failures on an IPV6 system. The NetDrvEnv class already gathers information about remote interface, so instead of fixing the local implementation switch to using cfg.remote_ifname. Before: ./drivers/net/ping.py Traceback (most recent call last): File "/new_tests/./drivers/net/ping.py", line 217, in main() File "/new_tests/./drivers/net/ping.py", line 204, in main get_interface_info(cfg) File "/new_tests/./drivers/net/ping.py", line 128, in get_interface_info raise KsftFailEx('Can not get remote interface') net.lib.py.ksft.KsftFailEx: Can not get remote interface After: ./drivers/net/ping.py TAP version 13 1..6 ok 1 ping.test_default # SKIP Test requires IPv4 connectivity ok 2 ping.test_xdp_generic_sb # SKIP Test requires IPv4 connectivity ok 3 ping.test_xdp_generic_mb # SKIP Test requires IPv4 connectivity ok 4 ping.test_xdp_native_sb # SKIP Test requires IPv4 connectivity ok 5 ping.test_xdp_native_mb # SKIP Test requires IPv4 connectivity ok 6 ping.test_xdp_offload # SKIP device does not support offloaded XDP Totals: pass:0 fail:0 xfail:0 xpass:0 skip:6 error:0 Fixes: 75cc19c8ff89 ("selftests: drv-net: add xdp cases for ping.py") Signed-off-by: Mohsin Bashir Reviewed-by: David Wei Link: https://patch.msgid.link/20250503013518.1722913-2-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ping.py | 22 +++++++++------------ 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 4b6822866066..5272e8b3536d 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -9,7 +9,6 @@ from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen, rand_port from lib.py import defer, ethtool, ip -remote_ifname="" no_sleep=False def _test_v4(cfg) -> None: @@ -57,7 +56,7 @@ def _set_offload_checksum(cfg, netnl, on) -> None: def _set_xdp_generic_sb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off") @@ -66,8 +65,8 @@ def _set_xdp_generic_sb_on(cfg) -> None: def _set_xdp_generic_mb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) - defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off") @@ -76,7 +75,7 @@ def _set_xdp_generic_mb_on(cfg) -> None: def _set_xdp_native_sb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] @@ -93,8 +92,8 @@ def _set_xdp_native_sb_on(cfg) -> None: def _set_xdp_native_mb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) - defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) try: cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") @@ -112,18 +111,15 @@ def _set_xdp_offload_on(cfg) -> None: except Exception as e: raise KsftSkipEx('device does not support offloaded XDP') defer(ip, f"link set dev {cfg.ifname} xdpoffload off") - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) if no_sleep != True: time.sleep(10) def get_interface_info(cfg) -> None: - global remote_ifname global no_sleep - remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_addr_v['4']} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout - remote_ifname = remote_info.rstrip('\n') - if remote_ifname == "": + if cfg.remote_ifname == "": raise KsftFailEx('Can not get remote interface') local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim": @@ -136,7 +132,7 @@ def set_interface_init(cfg) -> None: cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True) cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True) cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) def test_default(cfg, netnl) -> None: _set_offload_checksum(cfg, netnl, "off") From 8bb7d8e5cf7f44ea89a445975cbd78888324f234 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Fri, 2 May 2025 18:35:17 -0700 Subject: [PATCH 407/559] selftests: drv: net: avoid skipping tests On a system with either of the ipv4 or ipv6 information missing, tests are currently skipped. Ideally, the test should run as long as at least one address family is present. This patch make test run whenever possible. Before: ./drivers/net/ping.py TAP version 13 1..6 ok 1 ping.test_default # SKIP Test requires IPv4 connectivity ok 2 ping.test_xdp_generic_sb # SKIP Test requires IPv4 connectivity ok 3 ping.test_xdp_generic_mb # SKIP Test requires IPv4 connectivity ok 4 ping.test_xdp_native_sb # SKIP Test requires IPv4 connectivity ok 5 ping.test_xdp_native_mb # SKIP Test requires IPv4 connectivity ok 6 ping.test_xdp_offload # SKIP device does not support offloaded XDP Totals: pass:0 fail:0 xfail:0 xpass:0 skip:6 error:0 After: ./drivers/net/ping.py TAP version 13 1..6 ok 1 ping.test_default ok 2 ping.test_xdp_generic_sb ok 3 ping.test_xdp_generic_mb ok 4 ping.test_xdp_native_sb ok 5 ping.test_xdp_native_mb ok 6 ping.test_xdp_offload # SKIP device does not support offloaded XDP Totals: pass:5 fail:0 xfail:0 xpass:0 skip:1 error:0 Fixes: 75cc19c8ff89 ("selftests: drv-net: add xdp cases for ping.py") Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20250503013518.1722913-3-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ping.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 5272e8b3536d..16b7d3ab0fc8 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -12,7 +12,8 @@ from lib.py import defer, ethtool, ip no_sleep=False def _test_v4(cfg) -> None: - cfg.require_ipver("4") + if not cfg.addr_v["4"]: + return cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"]) cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) @@ -20,7 +21,8 @@ def _test_v4(cfg) -> None: cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) def _test_v6(cfg) -> None: - cfg.require_ipver("6") + if not cfg.addr_v["6"]: + return cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"]) cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote) From 4a9d494ca24b7fd509b3953fcb43d580cb05097b Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Fri, 2 May 2025 18:35:18 -0700 Subject: [PATCH 408/559] selftests: drv: net: add version indicator Currently, the test result does not differentiate between the cases when either one of the address families are configured or if both the address families are configured. Ideally, the result should report if a particular case was skipped. ./drivers/net/ping.py TAP version 13 1..7 ok 1 ping.test_default_v4 # SKIP Test requires IPv4 connectivity ok 2 ping.test_default_v6 ok 3 ping.test_xdp_generic_sb ok 4 ping.test_xdp_generic_mb ok 5 ping.test_xdp_native_sb ok 6 ping.test_xdp_native_mb ok 7 ping.test_xdp_offload # SKIP device does not support offloaded XDP Totals: pass:5 fail:0 xfail:0 xpass:0 skip:2 error:0 Fixes: 75cc19c8ff89 ("selftests: drv-net: add xdp cases for ping.py") Signed-off-by: Mohsin Bashir Reviewed-by: David Wei Link: https://patch.msgid.link/20250503013518.1722913-4-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ping.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 16b7d3ab0fc8..af8df2313a3b 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -136,13 +136,23 @@ def set_interface_init(cfg) -> None: cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) -def test_default(cfg, netnl) -> None: +def test_default_v4(cfg, netnl) -> None: + cfg.require_ipver("4") + _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) - _test_v6(cfg) _test_tcp(cfg) _set_offload_checksum(cfg, netnl, "on") _test_v4(cfg) + _test_tcp(cfg) + +def test_default_v6(cfg, netnl) -> None: + cfg.require_ipver("6") + + _set_offload_checksum(cfg, netnl, "off") + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") _test_v6(cfg) _test_tcp(cfg) @@ -200,7 +210,8 @@ def main() -> None: with NetDrvEpEnv(__file__) as cfg: get_interface_info(cfg) set_interface_init(cfg) - ksft_run([test_default, + ksft_run([test_default_v4, + test_default_v6, test_xdp_generic_sb, test_xdp_generic_mb, test_xdp_native_sb, From 4720f9707c783f642332dee3d56dccaefa850e42 Mon Sep 17 00:00:00 2001 From: David Wei Date: Fri, 2 May 2025 21:30:50 -0700 Subject: [PATCH 409/559] tools: ynl-gen: validate 0 len strings from kernel Strings from the kernel are guaranteed to be null terminated and ynl_attr_validate() checks for this. But it doesn't check if the string has a len of 0, which would cause problems when trying to access data[len - 1]. Fix this by checking that len is positive. Signed-off-by: David Wei Link: https://patch.msgid.link/20250503043050.861238-1-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index ce32cb35007d..c4da34048ef8 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -364,7 +364,7 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr) "Invalid attribute (binary %s)", policy->name); return -1; case YNL_PT_NUL_STR: - if ((!policy->len || len <= policy->len) && !data[len - 1]) + if (len && (!policy->len || len <= policy->len) && !data[len - 1]) break; yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, "Invalid attribute (string %s)", policy->name); From 2bb04ea9e5b7a2ef583c042ed5f8111804606e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 2 May 2025 15:01:01 +0200 Subject: [PATCH 410/559] drm/ttm: Fix ttm_backup kerneldoc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The docs were not properly updated from an earlier version of the code. Fixes: e7b5d23e5d47 ("drm/ttm: Provide a shmem backup implementation") Cc: Christian König Cc: Matthew Brost Cc: Matthew Auld Cc: dri-devel@lists.freedesktop.org Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250502130101.3185-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_backup.c | 2 +- include/drm/ttm/ttm_backup.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_backup.c b/drivers/gpu/drm/ttm/ttm_backup.c index 93c007f18855..f58e7393888f 100644 --- a/drivers/gpu/drm/ttm/ttm_backup.c +++ b/drivers/gpu/drm/ttm/ttm_backup.c @@ -55,7 +55,7 @@ void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle) * @backup: The struct backup pointer used to back up the page. * @dst: The struct page to copy into. * @handle: The handle returned when the page was backed up. - * @intr: Try to perform waits interruptable or at least killable. + * @intr: Try to perform waits interruptible or at least killable. * * Return: 0 on success, Negative error code on failure, notably * -EINTR if @intr was set to true and a signal is pending. diff --git a/include/drm/ttm/ttm_backup.h b/include/drm/ttm/ttm_backup.h index 24ad120b8827..574b932177cc 100644 --- a/include/drm/ttm/ttm_backup.h +++ b/include/drm/ttm/ttm_backup.h @@ -16,7 +16,7 @@ struct ttm_backup; * @handle: The handle to convert. * * Converts an opaque handle received from the - * struct ttm_backoup_ops::backup_page() function to an (invalid) + * ttm_backup_backup_page() function to an (invalid) * struct page pointer suitable for a struct page array. * * Return: An (invalid) struct page pointer. @@ -45,8 +45,8 @@ static inline bool ttm_backup_page_ptr_is_handle(const struct page *page) * * Return: The handle that was previously used in * ttm_backup_handle_to_page_ptr() to obtain a struct page pointer, suitable - * for use as argument in the struct ttm_backup_ops drop() or - * copy_backed_up_page() functions. + * for use as argument in the struct ttm_backup_drop() or + * ttm_backup_copy_page() functions. */ static inline unsigned long ttm_backup_page_ptr_to_handle(const struct page *page) From d4ad53adfe21df1464bae5ed916085a69d6ffb3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 2 May 2025 15:00:14 +0200 Subject: [PATCH 411/559] drm/ttm: Remove the struct ttm_backup abstraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The abstraction was previously added to support separate ttm_backup implementations. However with the current implementation casting from a struct file to a struct ttm_backup, we run into trouble since struct file may have randomized the layout and gcc complains. Remove the struct ttm_backup abstraction Cc: dri-devel@lists.freedesktop.org Cc: Matthew Brost Cc: Dave Airlie Cc: Christian König Cc: Matthew Auld Cc: Al Viro Reported-by: Kees Cook Closes: https://lore.kernel.org/dri-devel/9c8dbbafdaf9f3f089da2cde5a772d69579b3795.camel@linux.intel.com/T/#mb153ab9216cb813b92bdeb36f391ad4808c2ba29 Suggested-by: Christian König Fixes: 70d645deac98 ("drm/ttm: Add helpers for shrinking") Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250502130014.3156-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_backup.c | 42 +++++++++----------------------- drivers/gpu/drm/ttm/ttm_pool.c | 6 ++--- drivers/gpu/drm/ttm/ttm_tt.c | 2 +- include/drm/ttm/ttm_backup.h | 12 ++++----- include/drm/ttm/ttm_tt.h | 2 +- 5 files changed, 21 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_backup.c b/drivers/gpu/drm/ttm/ttm_backup.c index f58e7393888f..9e2d72c447ee 100644 --- a/drivers/gpu/drm/ttm/ttm_backup.c +++ b/drivers/gpu/drm/ttm/ttm_backup.c @@ -7,20 +7,6 @@ #include #include -/* - * Casting from randomized struct file * to struct ttm_backup * is fine since - * struct ttm_backup is never defined nor dereferenced. - */ -static struct file *ttm_backup_to_file(struct ttm_backup *backup) -{ - return (void *)backup; -} - -static struct ttm_backup *ttm_file_to_backup(struct file *file) -{ - return (void *)file; -} - /* * Need to map shmem indices to handle since a handle value * of 0 means error, following the swp_entry_t convention. @@ -40,12 +26,12 @@ static pgoff_t ttm_backup_handle_to_shmem_idx(pgoff_t handle) * @backup: The struct backup pointer used to obtain the handle * @handle: The handle obtained from the @backup_page function. */ -void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle) +void ttm_backup_drop(struct file *backup, pgoff_t handle) { loff_t start = ttm_backup_handle_to_shmem_idx(handle); start <<= PAGE_SHIFT; - shmem_truncate_range(file_inode(ttm_backup_to_file(backup)), start, + shmem_truncate_range(file_inode(backup), start, start + PAGE_SIZE - 1); } @@ -60,11 +46,10 @@ void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle) * Return: 0 on success, Negative error code on failure, notably * -EINTR if @intr was set to true and a signal is pending. */ -int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, +int ttm_backup_copy_page(struct file *backup, struct page *dst, pgoff_t handle, bool intr) { - struct file *filp = ttm_backup_to_file(backup); - struct address_space *mapping = filp->f_mapping; + struct address_space *mapping = backup->f_mapping; struct folio *from_folio; pgoff_t idx = ttm_backup_handle_to_shmem_idx(handle); @@ -106,12 +91,11 @@ int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, * the folio size- and usage. */ s64 -ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, +ttm_backup_backup_page(struct file *backup, struct page *page, bool writeback, pgoff_t idx, gfp_t page_gfp, gfp_t alloc_gfp) { - struct file *filp = ttm_backup_to_file(backup); - struct address_space *mapping = filp->f_mapping; + struct address_space *mapping = backup->f_mapping; unsigned long handle = 0; struct folio *to_folio; int ret; @@ -161,9 +145,9 @@ ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, * * After a call to this function, it's illegal to use the @backup pointer. */ -void ttm_backup_fini(struct ttm_backup *backup) +void ttm_backup_fini(struct file *backup) { - fput(ttm_backup_to_file(backup)); + fput(backup); } /** @@ -194,14 +178,10 @@ EXPORT_SYMBOL_GPL(ttm_backup_bytes_avail); * * Create a backup utilizing shmem objects. * - * Return: A pointer to a struct ttm_backup on success, + * Return: A pointer to a struct file on success, * an error pointer on error. */ -struct ttm_backup *ttm_backup_shmem_create(loff_t size) +struct file *ttm_backup_shmem_create(loff_t size) { - struct file *filp; - - filp = shmem_file_setup("ttm shmem backup", size, 0); - - return ttm_file_to_backup(filp); + return shmem_file_setup("ttm shmem backup", size, 0); } diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 83b10706ba89..c2ea865be657 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -506,7 +506,7 @@ static void ttm_pool_allocated_page_commit(struct page *allocated, * if successful, populate the page-table and dma-address arrays. */ static int ttm_pool_restore_commit(struct ttm_pool_tt_restore *restore, - struct ttm_backup *backup, + struct file *backup, const struct ttm_operation_ctx *ctx, struct ttm_pool_alloc_state *alloc) @@ -655,7 +655,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pgoff_t start_page, pgoff_t end_page) { struct page **pages = &tt->pages[start_page]; - struct ttm_backup *backup = tt->backup; + struct file *backup = tt->backup; pgoff_t i, nr; for (i = start_page; i < end_page; i += nr, pages += nr) { @@ -963,7 +963,7 @@ void ttm_pool_drop_backed_up(struct ttm_tt *tt) long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, const struct ttm_backup_flags *flags) { - struct ttm_backup *backup = tt->backup; + struct file *backup = tt->backup; struct page *page; unsigned long handle; gfp_t alloc_gfp; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index df0aa6c4b8b8..698cd4bf5e46 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -544,7 +544,7 @@ EXPORT_SYMBOL(ttm_tt_pages_limit); */ int ttm_tt_setup_backup(struct ttm_tt *tt) { - struct ttm_backup *backup = + struct file *backup = ttm_backup_shmem_create(((loff_t)tt->num_pages) << PAGE_SHIFT); if (WARN_ON_ONCE(!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))) diff --git a/include/drm/ttm/ttm_backup.h b/include/drm/ttm/ttm_backup.h index 574b932177cc..c33cba111171 100644 --- a/include/drm/ttm/ttm_backup.h +++ b/include/drm/ttm/ttm_backup.h @@ -9,8 +9,6 @@ #include #include -struct ttm_backup; - /** * ttm_backup_handle_to_page_ptr() - Convert handle to struct page pointer * @handle: The handle to convert. @@ -55,20 +53,20 @@ ttm_backup_page_ptr_to_handle(const struct page *page) return (unsigned long)page >> 1; } -void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle); +void ttm_backup_drop(struct file *backup, pgoff_t handle); -int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, +int ttm_backup_copy_page(struct file *backup, struct page *dst, pgoff_t handle, bool intr); s64 -ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, +ttm_backup_backup_page(struct file *backup, struct page *page, bool writeback, pgoff_t idx, gfp_t page_gfp, gfp_t alloc_gfp); -void ttm_backup_fini(struct ttm_backup *backup); +void ttm_backup_fini(struct file *backup); u64 ttm_backup_bytes_avail(void); -struct ttm_backup *ttm_backup_shmem_create(loff_t size); +struct file *ttm_backup_shmem_create(loff_t size); #endif diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 13cf47f3322f..406437ad674b 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -118,7 +118,7 @@ struct ttm_tt { * ttm_tt_create() callback is responsible for assigning * this field. */ - struct ttm_backup *backup; + struct file *backup; /** * @caching: The current caching state of the pages, see enum * ttm_caching. From 363cd2b81cfdf706bbfc9ec78db000c9b1ecc552 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Fri, 2 May 2025 19:04:12 +0100 Subject: [PATCH 412/559] arm64: cpufeature: Move arm64_use_ng_mappings to the .data section to prevent wrong idmap generation The PTE_MAYBE_NG macro sets the nG page table bit according to the value of "arm64_use_ng_mappings". This variable is currently placed in the .bss section. create_init_idmap() is called before the .bss section initialisation which is done in early_map_kernel(). Therefore, data/test_prot in create_init_idmap() could be set incorrectly through the PAGE_KERNEL -> PROT_DEFAULT -> PTE_MAYBE_NG macros. # llvm-objdump-21 --syms vmlinux-gcc | grep arm64_use_ng_mappings ffff800082f242a8 g O .bss 0000000000000001 arm64_use_ng_mappings The create_init_idmap() function disassembly compiled with llvm-21: // create_init_idmap() ffff80008255c058: d10103ff sub sp, sp, #0x40 ffff80008255c05c: a9017bfd stp x29, x30, [sp, #0x10] ffff80008255c060: a90257f6 stp x22, x21, [sp, #0x20] ffff80008255c064: a9034ff4 stp x20, x19, [sp, #0x30] ffff80008255c068: 910043fd add x29, sp, #0x10 ffff80008255c06c: 90003fc8 adrp x8, 0xffff800082d54000 ffff80008255c070: d280e06a mov x10, #0x703 // =1795 ffff80008255c074: 91400409 add x9, x0, #0x1, lsl #12 // =0x1000 ffff80008255c078: 394a4108 ldrb w8, [x8, #0x290] ------------- (1) ffff80008255c07c: f2e00d0a movk x10, #0x68, lsl #48 ffff80008255c080: f90007e9 str x9, [sp, #0x8] ffff80008255c084: aa0103f3 mov x19, x1 ffff80008255c088: aa0003f4 mov x20, x0 ffff80008255c08c: 14000000 b 0xffff80008255c08c <__pi_create_init_idmap+0x34> ffff80008255c090: aa082d56 orr x22, x10, x8, lsl #11 -------- (2) Note (1) is loading the arm64_use_ng_mappings value in w8 and (2) is set the text or data prot with the w8 value to set PTE_NG bit. If the .bss section isn't initialized, x8 could include a garbage value and generate an incorrect mapping. Annotate arm64_use_ng_mappings as __read_mostly so that it is placed in the .data section. Fixes: 84b04d3e6bdb ("arm64: kernel: Create initial ID map from C code") Cc: stable@vger.kernel.org # 6.9.x Tested-by: Nathan Chancellor Signed-off-by: Yeoreum Yun Link: https://lore.kernel.org/r/20250502180412.3774883-1-yeoreum.yun@arm.com [catalin.marinas@arm.com: use __read_mostly instead of __ro_after_init] [catalin.marinas@arm.com: slight tweaking of the code comment] Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9c4d6d552b25..4c46d80aa64b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -114,7 +114,14 @@ static struct arm64_cpu_capabilities const __ro_after_init *cpucap_ptrs[ARM64_NC DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS); -bool arm64_use_ng_mappings = false; +/* + * arm64_use_ng_mappings must be placed in the .data section, otherwise it + * ends up in the .bss section where it is initialized in early_map_kernel() + * after the MMU (with the idmap) was enabled. create_init_idmap() - which + * runs before early_map_kernel() and reads the variable via PTE_MAYBE_NG - + * may end up generating an incorrect idmap page table attributes. + */ +bool arm64_use_ng_mappings __read_mostly = false; EXPORT_SYMBOL(arm64_use_ng_mappings); DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors; From 4db6c75124d871fbabf8243f947d34cc7e0697fc Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 5 May 2025 02:07:32 +0100 Subject: [PATCH 413/559] net: ethernet: mtk_eth_soc: reset all TX queues on DMA free The purpose of resetting the TX queue is to reset the byte and packet count as well as to clear the software flow control XOFF bit. MediaTek developers pointed out that netdev_reset_queue would only resets queue 0 of the network device. Queues that are not reset may cause unexpected issues. Packets may stop being sent after reset and "transmit timeout" log may be displayed. Import fix from MediaTek's SDK to resolve this issue. Link: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/319c0d9905579a46dc448579f892f364f1f84818 Fixes: f63959c7eec31 ("net: ethernet: mtk_eth_soc: implement multi-queue support for per-port queues") Signed-off-by: Daniel Golle Link: https://patch.msgid.link/c9ff9adceac4f152239a0f65c397f13547639175.1746406763.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 8fda4ce80d81..53c39561b6d9 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3186,11 +3186,19 @@ static int mtk_dma_init(struct mtk_eth *eth) static void mtk_dma_free(struct mtk_eth *eth) { const struct mtk_soc_data *soc = eth->soc; - int i; + int i, j, txqs = 1; + + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) + txqs = MTK_QDMA_NUM_QUEUES; + + for (i = 0; i < MTK_MAX_DEVS; i++) { + if (!eth->netdev[i]) + continue; + + for (j = 0; j < txqs; j++) + netdev_tx_reset_subqueue(eth->netdev[i], j); + } - for (i = 0; i < MTK_MAX_DEVS; i++) - if (eth->netdev[i]) - netdev_reset_queue(eth->netdev[i]); if (!MTK_HAS_CAPS(soc->caps, MTK_SRAM) && eth->scratch_ring) { dma_free_coherent(eth->dma_dev, MTK_QDMA_RING_SIZE * soc->tx.desc_size, From e8716b5b0dff1b3d523b4a83fd5e94d57b887c5c Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Mon, 5 May 2025 02:07:58 +0100 Subject: [PATCH 414/559] net: ethernet: mtk_eth_soc: do not reset PSE when setting FE Remove redundant PSE reset. When setting FE register there is no need to reset PSE, doing so may cause FE to work abnormal. Link: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/3a5223473e086a4b54a2b9a44df7d9ddcc2bc75a Fixes: dee4dd10c79aa ("net: ethernet: mtk_eth_soc: ppe: add support for multiple PPEs") Signed-off-by: Frank Wunderlich Link: https://patch.msgid.link/18f0ac7d83f82defa3342c11ef0d1362f6b81e88.1746406763.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 53c39561b6d9..22a532695fb0 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3473,9 +3473,6 @@ static int mtk_open(struct net_device *dev) } mtk_gdm_config(eth, target_mac->id, gdm_config); } - /* Reset and enable PSE */ - mtk_w32(eth, RST_GL_PSE, MTK_RST_GL); - mtk_w32(eth, 0, MTK_RST_GL); napi_enable(ð->tx_napi); napi_enable(ð->rx_napi); From 7c6fa1797a725732981f2d77711c867166737719 Mon Sep 17 00:00:00 2001 From: Kevin Baker Date: Mon, 5 May 2025 12:02:56 -0500 Subject: [PATCH 415/559] drm/panel: simple: Update timings for AUO G101EVN010 Switch to panel timings based on datasheet for the AUO G101EVN01.0 LVDS panel. Default timings were tested on the panel. Previous mode-based timings resulted in horizontal display shift. Signed-off-by: Kevin Baker Fixes: 4fb86404a977 ("drm/panel: simple: Add AUO G101EVN010 panel support") Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250505170256.1385113-1-kevinb@ventureresearch.com Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250505170256.1385113-1-kevinb@ventureresearch.com --- drivers/gpu/drm/panel/panel-simple.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 232b03c1a259..33a37539de57 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -1027,27 +1027,28 @@ static const struct panel_desc auo_g070vvn01 = { }, }; -static const struct drm_display_mode auo_g101evn010_mode = { - .clock = 68930, - .hdisplay = 1280, - .hsync_start = 1280 + 82, - .hsync_end = 1280 + 82 + 2, - .htotal = 1280 + 82 + 2 + 84, - .vdisplay = 800, - .vsync_start = 800 + 8, - .vsync_end = 800 + 8 + 2, - .vtotal = 800 + 8 + 2 + 6, +static const struct display_timing auo_g101evn010_timing = { + .pixelclock = { 64000000, 68930000, 85000000 }, + .hactive = { 1280, 1280, 1280 }, + .hfront_porch = { 8, 64, 256 }, + .hback_porch = { 8, 64, 256 }, + .hsync_len = { 40, 168, 767 }, + .vactive = { 800, 800, 800 }, + .vfront_porch = { 4, 8, 100 }, + .vback_porch = { 4, 8, 100 }, + .vsync_len = { 8, 16, 223 }, }; static const struct panel_desc auo_g101evn010 = { - .modes = &auo_g101evn010_mode, - .num_modes = 1, + .timings = &auo_g101evn010_timing, + .num_timings = 1, .bpc = 6, .size = { .width = 216, .height = 135, }, .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, .connector_type = DRM_MODE_CONNECTOR_LVDS, }; From f1aff4bc199cb92c055668caed65505e3b4d2656 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 6 May 2025 11:31:50 +0000 Subject: [PATCH 416/559] dm: fix copying after src array boundaries The blammed commit copied to argv the size of the reallocated argv, instead of the size of the old_argv, thus reading and copying from past the old_argv allocated memory. Following BUG_ON was hit: [ 3.038929][ T1] kernel BUG at lib/string_helpers.c:1040! [ 3.039147][ T1] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP ... [ 3.056489][ T1] Call trace: [ 3.056591][ T1] __fortify_panic+0x10/0x18 (P) [ 3.056773][ T1] dm_split_args+0x20c/0x210 [ 3.056942][ T1] dm_table_add_target+0x13c/0x360 [ 3.057132][ T1] table_load+0x110/0x3ac [ 3.057292][ T1] dm_ctl_ioctl+0x424/0x56c [ 3.057457][ T1] __arm64_sys_ioctl+0xa8/0xec [ 3.057634][ T1] invoke_syscall+0x58/0x10c [ 3.057804][ T1] el0_svc_common+0xa8/0xdc [ 3.057970][ T1] do_el0_svc+0x1c/0x28 [ 3.058123][ T1] el0_svc+0x50/0xac [ 3.058266][ T1] el0t_64_sync_handler+0x60/0xc4 [ 3.058452][ T1] el0t_64_sync+0x1b0/0x1b4 [ 3.058620][ T1] Code: f800865e a9bf7bfd 910003fd 941f48aa (d4210000) [ 3.058897][ T1] ---[ end trace 0000000000000000 ]--- [ 3.059083][ T1] Kernel panic - not syncing: Oops - BUG: Fatal exception Fix it by copying the size of src, and not the size of dst, as it was. Fixes: 5a2a6c428190 ("dm: always update the array size in realloc_argv on success") Cc: stable@vger.kernel.org Signed-off-by: Tudor Ambarus Signed-off-by: Mikulas Patocka --- drivers/md/dm-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 31d67a1a91dd..6b23e777e10e 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -524,9 +524,9 @@ static char **realloc_argv(unsigned int *size, char **old_argv) } argv = kmalloc_array(new_size, sizeof(*argv), gfp); if (argv) { - *size = new_size; if (old_argv) memcpy(argv, old_argv, *size * sizeof(*argv)); + *size = new_size; } kfree(old_argv); From 0ca6df4f40cf4c32487944aaf48319cb6c25accc Mon Sep 17 00:00:00 2001 From: Norbert Szetei Date: Fri, 2 May 2025 08:21:58 +0900 Subject: [PATCH 417/559] ksmbd: prevent out-of-bounds stream writes by validating *pos ksmbd_vfs_stream_write() did not validate whether the write offset (*pos) was within the bounds of the existing stream data length (v_len). If *pos was greater than or equal to v_len, this could lead to an out-of-bounds memory write. This patch adds a check to ensure *pos is less than v_len before proceeding. If the condition fails, -EINVAL is returned. Cc: stable@vger.kernel.org Signed-off-by: Norbert Szetei Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 391d07da586c..482eba0f4dc1 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -426,6 +426,13 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, goto out; } + if (v_len <= *pos) { + pr_err("stream write position %lld is out of bounds (stream length: %zd)\n", + *pos, v_len); + err = -EINVAL; + goto out; + } + if (v_len < size) { wbuf = kvzalloc(size, KSMBD_DEFAULT_GFP); if (!wbuf) { From 36991c1ccde2d5a521577c448ffe07fcccfe104d Mon Sep 17 00:00:00 2001 From: Sean Heelan Date: Tue, 6 May 2025 22:04:52 +0900 Subject: [PATCH 418/559] ksmbd: Fix UAF in __close_file_table_ids A use-after-free is possible if one thread destroys the file via __ksmbd_close_fd while another thread holds a reference to it. The existing checks on fp->refcount are not sufficient to prevent this. The fix takes ft->lock around the section which removes the file from the file table. This prevents two threads acquiring the same file pointer via __close_file_table_ids, as well as the other functions which retrieve a file from the IDR and which already use this same lock. Cc: stable@vger.kernel.org Signed-off-by: Sean Heelan Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs_cache.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 1f8fa3468173..dfed6fce8904 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -661,21 +661,40 @@ __close_file_table_ids(struct ksmbd_file_table *ft, bool (*skip)(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp)) { - unsigned int id; - struct ksmbd_file *fp; - int num = 0; + struct ksmbd_file *fp; + unsigned int id = 0; + int num = 0; - idr_for_each_entry(ft->idr, fp, id) { - if (skip(tcon, fp)) + while (1) { + write_lock(&ft->lock); + fp = idr_get_next(ft->idr, &id); + if (!fp) { + write_unlock(&ft->lock); + break; + } + + if (skip(tcon, fp) || + !atomic_dec_and_test(&fp->refcount)) { + id++; + write_unlock(&ft->lock); continue; + } set_close_state_blocked_works(fp); + idr_remove(ft->idr, fp->volatile_id); + fp->volatile_id = KSMBD_NO_FID; + write_unlock(&ft->lock); + + down_write(&fp->f_ci->m_lock); + list_del_init(&fp->node); + up_write(&fp->f_ci->m_lock); - if (!atomic_dec_and_test(&fp->refcount)) - continue; __ksmbd_close_fd(ft, fp); + num++; + id++; } + return num; } From dcaeeb8ae84c5506ebc574732838264f3887738c Mon Sep 17 00:00:00 2001 From: Antonios Salios Date: Fri, 25 Apr 2025 13:17:45 +0200 Subject: [PATCH 419/559] can: m_can: m_can_class_allocate_dev(): initialize spin lock on device probe The spin lock tx_handling_spinlock in struct m_can_classdev is not being initialized. This leads the following spinlock bad magic complaint from the kernel, eg. when trying to send CAN frames with cansend from can-utils: | BUG: spinlock bad magic on CPU#0, cansend/95 | lock: 0xff60000002ec1010, .magic: 00000000, .owner: /-1, .owner_cpu: 0 | CPU: 0 UID: 0 PID: 95 Comm: cansend Not tainted 6.15.0-rc3-00032-ga79be02bba5c #5 NONE | Hardware name: MachineWare SIM-V (DT) | Call Trace: | [] dump_backtrace+0x1c/0x24 | [] show_stack+0x28/0x34 | [] dump_stack_lvl+0x4a/0x68 | [] dump_stack+0x14/0x1c | [] spin_dump+0x62/0x6e | [] do_raw_spin_lock+0xd0/0x142 | [] _raw_spin_lock_irqsave+0x20/0x2c | [] m_can_start_xmit+0x90/0x34a | [] dev_hard_start_xmit+0xa6/0xee | [] sch_direct_xmit+0x114/0x292 | [] __dev_queue_xmit+0x3b0/0xaa8 | [] can_send+0xc6/0x242 | [] raw_sendmsg+0x1a8/0x36c | [] sock_write_iter+0x9a/0xee | [] vfs_write+0x184/0x3a6 | [] ksys_write+0xa0/0xc0 | [] __riscv_sys_write+0x14/0x1c | [] do_trap_ecall_u+0x168/0x212 | [] handle_exception+0x146/0x152 Initializing the spin lock in m_can_class_allocate_dev solves that problem. Fixes: 1fa80e23c150 ("can: m_can: Introduce a tx_fifo_in_flight counter") Signed-off-by: Antonios Salios Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20250425111744.37604-2-antonios@mwa.re Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 884a6352c42b..326ede9d400f 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2379,6 +2379,7 @@ struct m_can_classdev *m_can_class_allocate_dev(struct device *dev, SET_NETDEV_DEV(net_dev, dev); m_can_of_parse_mram(class_dev, mram_config_vals); + spin_lock_init(&class_dev->tx_handling_spinlock); out: return class_dev; } From 5e1663810e11c64956aa7e280cf74b2f3284d816 Mon Sep 17 00:00:00 2001 From: Kelsey Maes Date: Wed, 30 Apr 2025 09:15:01 -0700 Subject: [PATCH 420/559] can: mcp251xfd: fix TDC setting for low data bit rates The TDC is currently hardcoded enabled. This means that even for lower CAN-FD data bitrates (with a DBRP (data bitrate prescaler) > 2) a TDC is configured. This leads to a bus-off condition. ISO 11898-1 section 11.3.3 says "Transmitter delay compensation" (TDC) is only applicable if DBRP is 1 or 2. To fix the problem, switch the driver to use the TDC calculation provided by the CAN driver framework (which respects ISO 11898-1 section 11.3.3). This has the positive side effect that userspace can control TDC as needed. Demonstration of the feature in action: | $ ip link set can0 up type can bitrate 125000 dbitrate 500000 fd on | $ ip -details link show can0 | 3: can0: mtu 72 qdisc pfifo_fast state UP mode DEFAULT group default qlen 10 | link/can promiscuity 0 allmulti 0 minmtu 0 maxmtu 0 | can state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0 | bitrate 125000 sample-point 0.875 | tq 50 prop-seg 69 phase-seg1 70 phase-seg2 20 sjw 10 brp 2 | mcp251xfd: tseg1 2..256 tseg2 1..128 sjw 1..128 brp 1..256 brp_inc 1 | dbitrate 500000 dsample-point 0.875 | dtq 125 dprop-seg 6 dphase-seg1 7 dphase-seg2 2 dsjw 1 dbrp 5 | mcp251xfd: dtseg1 1..32 dtseg2 1..16 dsjw 1..16 dbrp 1..256 dbrp_inc 1 | tdcv 0..63 tdco 0..63 | clock 40000000 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 tso_max_size 65536 tso_max_segs 65535 gro_max_size 65536 parentbus spi parentdev spi0.0 | $ ip link set can0 up type can bitrate 1000000 dbitrate 4000000 fd on | $ ip -details link show can0 | 3: can0: mtu 72 qdisc pfifo_fast state UP mode DEFAULT group default qlen 10 | link/can promiscuity 0 allmulti 0 minmtu 0 maxmtu 0 | can state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0 | bitrate 1000000 sample-point 0.750 | tq 25 prop-seg 14 phase-seg1 15 phase-seg2 10 sjw 5 brp 1 | mcp251xfd: tseg1 2..256 tseg2 1..128 sjw 1..128 brp 1..256 brp_inc 1 | dbitrate 4000000 dsample-point 0.700 | dtq 25 dprop-seg 3 dphase-seg1 3 dphase-seg2 3 dsjw 1 dbrp 1 | tdco 7 | mcp251xfd: dtseg1 1..32 dtseg2 1..16 dsjw 1..16 dbrp 1..256 dbrp_inc 1 | tdcv 0..63 tdco 0..63 | clock 40000000 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 tso_max_size 65536 tso_max_segs 65535 gro_max_size 65536 parentbus spi parentdev spi0.0 There has been some confusion about the MCP2518FD using a relative or absolute TDCO due to the datasheet specifying a range of [-64,63]. I have a custom board with a 40 MHz clock and an estimated loop delay of 100 to 216 ns. During testing at a data bit rate of 4 Mbit/s I found that using can_get_relative_tdco() resulted in bus-off errors. The final TDCO value was 1 which corresponds to a 10% SSP in an absolute configuration. This behavior is expected if the TDCO value is really absolute and not relative. Using priv->can.tdc.tdco instead results in a final TDCO of 8, setting the SSP at exactly 80%. This configuration works. The automatic, manual, and off TDC modes were tested at speeds up to, and including, 8 Mbit/s on real hardware and behave as expected. Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Reported-by: Kelsey Maes Closes: https://lore.kernel.org/all/C2121586-C87F-4B23-A933-845362C29CA1@vpprocess.com Reviewed-by: Vincent Mailhol Signed-off-by: Kelsey Maes Link: https://patch.msgid.link/20250430161501.79370-1-kelsey@vpprocess.com [mkl: add comment] Signed-off-by: Marc Kleine-Budde --- .../net/can/spi/mcp251xfd/mcp251xfd-core.c | 40 +++++++++++++++---- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 3bc56517fe7a..064d81c724f4 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -75,6 +75,24 @@ static const struct can_bittiming_const mcp251xfd_data_bittiming_const = { .brp_inc = 1, }; +/* The datasheet of the mcp2518fd (DS20006027B) specifies a range of + * [-64,63] for TDCO, indicating a relative TDCO. + * + * Manual tests have shown, that using a relative TDCO configuration + * results in bus off, while an absolute configuration works. + * + * For TDCO use the max value (63) from the data sheet, but 0 as the + * minimum. + */ +static const struct can_tdc_const mcp251xfd_tdc_const = { + .tdcv_min = 0, + .tdcv_max = 63, + .tdco_min = 0, + .tdco_max = 63, + .tdcf_min = 0, + .tdcf_max = 0, +}; + static const char *__mcp251xfd_get_model_str(enum mcp251xfd_model model) { switch (model) { @@ -510,8 +528,7 @@ static int mcp251xfd_set_bittiming(const struct mcp251xfd_priv *priv) { const struct can_bittiming *bt = &priv->can.bittiming; const struct can_bittiming *dbt = &priv->can.data_bittiming; - u32 val = 0; - s8 tdco; + u32 tdcmod, val = 0; int err; /* CAN Control Register @@ -575,11 +592,16 @@ static int mcp251xfd_set_bittiming(const struct mcp251xfd_priv *priv) return err; /* Transmitter Delay Compensation */ - tdco = clamp_t(int, dbt->brp * (dbt->prop_seg + dbt->phase_seg1), - -64, 63); - val = FIELD_PREP(MCP251XFD_REG_TDC_TDCMOD_MASK, - MCP251XFD_REG_TDC_TDCMOD_AUTO) | - FIELD_PREP(MCP251XFD_REG_TDC_TDCO_MASK, tdco); + if (priv->can.ctrlmode & CAN_CTRLMODE_TDC_AUTO) + tdcmod = MCP251XFD_REG_TDC_TDCMOD_AUTO; + else if (priv->can.ctrlmode & CAN_CTRLMODE_TDC_MANUAL) + tdcmod = MCP251XFD_REG_TDC_TDCMOD_MANUAL; + else + tdcmod = MCP251XFD_REG_TDC_TDCMOD_DISABLED; + + val = FIELD_PREP(MCP251XFD_REG_TDC_TDCMOD_MASK, tdcmod) | + FIELD_PREP(MCP251XFD_REG_TDC_TDCV_MASK, priv->can.tdc.tdcv) | + FIELD_PREP(MCP251XFD_REG_TDC_TDCO_MASK, priv->can.tdc.tdco); return regmap_write(priv->map_reg, MCP251XFD_REG_TDC, val); } @@ -2083,10 +2105,12 @@ static int mcp251xfd_probe(struct spi_device *spi) priv->can.do_get_berr_counter = mcp251xfd_get_berr_counter; priv->can.bittiming_const = &mcp251xfd_bittiming_const; priv->can.data_bittiming_const = &mcp251xfd_data_bittiming_const; + priv->can.tdc_const = &mcp251xfd_tdc_const; priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_BERR_REPORTING | CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO | - CAN_CTRLMODE_CC_LEN8_DLC; + CAN_CTRLMODE_CC_LEN8_DLC | CAN_CTRLMODE_TDC_AUTO | + CAN_CTRLMODE_TDC_MANUAL; set_bit(MCP251XFD_FLAGS_DOWN, priv->flags); priv->ndev = ndev; priv->spi = spi; From 84f5eb833f53ae192baed4cfb8d9eaab43481fc9 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:44 +0200 Subject: [PATCH 421/559] can: mcp251xfd: mcp251xfd_remove(): fix order of unregistration calls If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. With the mcp251xfd driver the removal of the module causes the following warning: | WARNING: CPU: 0 PID: 352 at net/core/dev.c:7342 __netif_napi_del_locked+0xc8/0xd8 as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-1-59a9b131589d@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 064d81c724f4..c30b04f8fc0d 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2198,8 +2198,8 @@ static void mcp251xfd_remove(struct spi_device *spi) struct mcp251xfd_priv *priv = spi_get_drvdata(spi); struct net_device *ndev = priv->ndev; - can_rx_offload_del(&priv->offload); mcp251xfd_unregister(priv); + can_rx_offload_del(&priv->offload); spi->max_speed_hz = priv->spi_max_speed_hz_orig; free_candev(ndev); } From db492e24f9b05547ba12b4783f09c9d943cf42fe Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 6 May 2025 13:27:30 +0200 Subject: [PATCH 422/559] block: only update request sector if needed In case of a ZONE APPEND write, regardless of native ZONE APPEND or the emulation layer in the zone write plugging code, the sector the data got written to by the device needs to be updated in the bio. At the moment, this is done for every native ZONE APPEND write and every request that is flagged with 'BIO_ZONE_WRITE_PLUGGING'. But thus superfluously updates the sector for regular writes to a zoned block device. Check if a bio is a native ZONE APPEND write or if the bio is flagged as 'BIO_EMULATES_ZONE_APPEND', meaning the block layer's zone write plugging code handles the ZONE APPEND and translates it into a regular write and back. Only if one of these two criterion is met, update the sector in the bio upon completion. Signed-off-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/dea089581cb6b777c1cd1500b38ac0b61df4b2d1.1746530748.git.jth@kernel.org Signed-off-by: Jens Axboe --- block/blk.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk.h b/block/blk.h index 328075787814..594eeba7b949 100644 --- a/block/blk.h +++ b/block/blk.h @@ -480,7 +480,8 @@ static inline void blk_zone_update_request_bio(struct request *rq, * the original BIO sector so that blk_zone_write_plug_bio_endio() can * lookup the zone write plug. */ - if (req_op(rq) == REQ_OP_ZONE_APPEND || bio_zone_write_plugging(bio)) + if (req_op(rq) == REQ_OP_ZONE_APPEND || + bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) bio->bi_iter.bi_sector = rq->__sector; } void blk_zone_write_plug_bio_endio(struct bio *bio); From 037ada7a3181300218e4fd78bef6a741cfa7f808 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:45 +0200 Subject: [PATCH 423/559] can: rockchip_canfd: rkcanfd_remove(): fix order of unregistration calls If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. The removal of the module causes a warning, as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: ff60bfbaf67f ("can: rockchip_canfd: add driver for Rockchip CAN-FD controller") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-2-59a9b131589d@pengutronix.de Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rockchip/rockchip_canfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/rockchip/rockchip_canfd-core.c b/drivers/net/can/rockchip/rockchip_canfd-core.c index 7107a37da36c..c3fb3176ce42 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-core.c +++ b/drivers/net/can/rockchip/rockchip_canfd-core.c @@ -937,8 +937,8 @@ static void rkcanfd_remove(struct platform_device *pdev) struct rkcanfd_priv *priv = platform_get_drvdata(pdev); struct net_device *ndev = priv->ndev; - can_rx_offload_del(&priv->offload); rkcanfd_unregister(priv); + can_rx_offload_del(&priv->offload); free_candev(ndev); } From 0713a1b3276b98c7dafbeefef00d7bc3a9119a84 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:46 +0200 Subject: [PATCH 424/559] can: mcan: m_can_class_unregister(): fix order of unregistration calls If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. The removal of the module causes a warning, as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: 1be37d3b0414 ("can: m_can: fix periph RX path: use rx-offload to ensure skbs are sent from softirq context") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-3-59a9b131589d@pengutronix.de Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 326ede9d400f..c2c116ce1087 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2463,9 +2463,9 @@ EXPORT_SYMBOL_GPL(m_can_class_register); void m_can_class_unregister(struct m_can_classdev *cdev) { + unregister_candev(cdev->net); if (cdev->is_peripheral) can_rx_offload_del(&cdev->offload); - unregister_candev(cdev->net); } EXPORT_SYMBOL_GPL(m_can_class_unregister); From 511e64e13d8cc72853275832e3f372607466c18c Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 29 Apr 2025 09:05:55 +0200 Subject: [PATCH 425/559] can: gw: fix RCU/BH usage in cgw_create_job() As reported by Sebastian Andrzej Siewior the use of local_bh_disable() is only feasible in uni processor systems to update the modification rules. The usual use-case to update the modification rules is to update the data of the modifications but not the modification types (AND/OR/XOR/SET) or the checksum functions itself. To omit additional memory allocations to maintain fast modification switching times, the modification description space is doubled at gw-job creation time so that only the reference to the active modification description is changed under rcu protection. Rename cgw_job::mod to cf_mod and make it a RCU pointer. Allocate in cgw_create_job() and free it together with cgw_job in cgw_job_free_rcu(). Update all users to dereference cgw_job::cf_mod with a RCU accessor and if possible once. [bigeasy: Replace mod1/mod2 from the Oliver's original patch with dynamic allocation, use RCU annotation and accessor] Reported-by: Sebastian Andrzej Siewior Closes: https://lore.kernel.org/linux-can/20231031112349.y0aLoBrz@linutronix.de/ Fixes: dd895d7f21b2 ("can: cangw: introduce optional uid to reference created routing jobs") Tested-by: Oliver Hartkopp Signed-off-by: Oliver Hartkopp Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250429070555.cs-7b_eZ@linutronix.de Signed-off-by: Marc Kleine-Budde --- net/can/gw.c | 151 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 91 insertions(+), 60 deletions(-) diff --git a/net/can/gw.c b/net/can/gw.c index ef93293c1fae..55eccb1c7620 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -130,7 +130,7 @@ struct cgw_job { u32 handled_frames; u32 dropped_frames; u32 deleted_frames; - struct cf_mod mod; + struct cf_mod __rcu *cf_mod; union { /* CAN frame data source */ struct net_device *dev; @@ -459,6 +459,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) struct cgw_job *gwj = (struct cgw_job *)data; struct canfd_frame *cf; struct sk_buff *nskb; + struct cf_mod *mod; int modidx = 0; /* process strictly Classic CAN or CAN FD frames */ @@ -506,7 +507,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) * When there is at least one modification function activated, * we need to copy the skb as we want to modify skb->data. */ - if (gwj->mod.modfunc[0]) + mod = rcu_dereference(gwj->cf_mod); + if (mod->modfunc[0]) nskb = skb_copy(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); @@ -529,8 +531,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) cf = (struct canfd_frame *)nskb->data; /* perform preprocessed modification functions if there are any */ - while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) - (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); + while (modidx < MAX_MODFUNCTIONS && mod->modfunc[modidx]) + (*mod->modfunc[modidx++])(cf, mod); /* Has the CAN frame been modified? */ if (modidx) { @@ -546,11 +548,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) } /* check for checksum updates */ - if (gwj->mod.csumfunc.crc8) - (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + if (mod->csumfunc.crc8) + (*mod->csumfunc.crc8)(cf, &mod->csum.crc8); - if (gwj->mod.csumfunc.xor) - (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); + if (mod->csumfunc.xor) + (*mod->csumfunc.xor)(cf, &mod->csum.xor); } /* clear the skb timestamp if not configured the other way */ @@ -581,9 +583,20 @@ static void cgw_job_free_rcu(struct rcu_head *rcu_head) { struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu); + /* cgw_job::cf_mod is always accessed from the same cgw_job object within + * the same RCU read section. Once cgw_job is scheduled for removal, + * cf_mod can also be removed without mandating an additional grace period. + */ + kfree(rcu_access_pointer(gwj->cf_mod)); kmem_cache_free(cgw_cache, gwj); } +/* Return cgw_job::cf_mod with RTNL protected section */ +static struct cf_mod *cgw_job_cf_mod(struct cgw_job *gwj) +{ + return rcu_dereference_protected(gwj->cf_mod, rtnl_is_locked()); +} + static int cgw_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { @@ -616,6 +629,7 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, { struct rtcanmsg *rtcan; struct nlmsghdr *nlh; + struct cf_mod *mod; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtcan), flags); if (!nlh) @@ -650,82 +664,83 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } + mod = cgw_job_cf_mod(gwj); if (gwj->flags & CGW_FLAGS_CAN_FD) { struct cgw_fdframe_mod mb; - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; + if (mod->modtype.and) { + memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf)); + mb.modtype = mod->modtype.and; if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; + if (mod->modtype.or) { + memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf)); + mb.modtype = mod->modtype.or; if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; + if (mod->modtype.xor) { + memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf)); + mb.modtype = mod->modtype.xor; if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; + if (mod->modtype.set) { + memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf)); + mb.modtype = mod->modtype.set; if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } else { struct cgw_frame_mod mb; - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; + if (mod->modtype.and) { + memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf)); + mb.modtype = mod->modtype.and; if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; + if (mod->modtype.or) { + memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf)); + mb.modtype = mod->modtype.or; if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; + if (mod->modtype.xor) { + memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf)); + mb.modtype = mod->modtype.xor; if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; + if (mod->modtype.set) { + memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf)); + mb.modtype = mod->modtype.set; if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } - if (gwj->mod.uid) { - if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0) + if (mod->uid) { + if (nla_put_u32(skb, CGW_MOD_UID, mod->uid) < 0) goto cancel; } - if (gwj->mod.csumfunc.crc8) { + if (mod->csumfunc.crc8) { if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN, - &gwj->mod.csum.crc8) < 0) + &mod->csum.crc8) < 0) goto cancel; } - if (gwj->mod.csumfunc.xor) { + if (mod->csumfunc.xor) { if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN, - &gwj->mod.csum.xor) < 0) + &mod->csum.xor) < 0) goto cancel; } @@ -1059,7 +1074,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct rtcanmsg *r; struct cgw_job *gwj; - struct cf_mod mod; + struct cf_mod *mod; struct can_can_gw ccgw; u8 limhops = 0; int err = 0; @@ -1078,37 +1093,48 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, if (r->gwtype != CGW_TYPE_CAN_CAN) return -EINVAL; - err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); - if (err < 0) - return err; + mod = kmalloc(sizeof(*mod), GFP_KERNEL); + if (!mod) + return -ENOMEM; - if (mod.uid) { + err = cgw_parse_attr(nlh, mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); + if (err < 0) + goto out_free_cf; + + if (mod->uid) { ASSERT_RTNL(); /* check for updating an existing job with identical uid */ hlist_for_each_entry(gwj, &net->can.cgw_list, list) { - if (gwj->mod.uid != mod.uid) + struct cf_mod *old_cf; + + old_cf = cgw_job_cf_mod(gwj); + if (old_cf->uid != mod->uid) continue; /* interfaces & filters must be identical */ - if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) - return -EINVAL; + if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) { + err = -EINVAL; + goto out_free_cf; + } - /* update modifications with disabled softirq & quit */ - local_bh_disable(); - memcpy(&gwj->mod, &mod, sizeof(mod)); - local_bh_enable(); + rcu_assign_pointer(gwj->cf_mod, mod); + kfree_rcu_mightsleep(old_cf); return 0; } } /* ifindex == 0 is not allowed for job creation */ - if (!ccgw.src_idx || !ccgw.dst_idx) - return -ENODEV; + if (!ccgw.src_idx || !ccgw.dst_idx) { + err = -ENODEV; + goto out_free_cf; + } gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL); - if (!gwj) - return -ENOMEM; + if (!gwj) { + err = -ENOMEM; + goto out_free_cf; + } gwj->handled_frames = 0; gwj->dropped_frames = 0; @@ -1118,7 +1144,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, gwj->limit_hops = limhops; /* insert already parsed information */ - memcpy(&gwj->mod, &mod, sizeof(mod)); + RCU_INIT_POINTER(gwj->cf_mod, mod); memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw)); err = -ENODEV; @@ -1152,9 +1178,11 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, if (!err) hlist_add_head_rcu(&gwj->list, &net->can.cgw_list); out: - if (err) + if (err) { kmem_cache_free(cgw_cache, gwj); - +out_free_cf: + kfree(mod); + } return err; } @@ -1214,19 +1242,22 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, /* remove only the first matching entry */ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { + struct cf_mod *cf_mod; + if (gwj->flags != r->flags) continue; if (gwj->limit_hops != limhops) continue; + cf_mod = cgw_job_cf_mod(gwj); /* we have a match when uid is enabled and identical */ - if (gwj->mod.uid || mod.uid) { - if (gwj->mod.uid != mod.uid) + if (cf_mod->uid || mod.uid) { + if (cf_mod->uid != mod.uid) continue; } else { /* no uid => check for identical modifications */ - if (memcmp(&gwj->mod, &mod, sizeof(mod))) + if (memcmp(cf_mod, &mod, sizeof(mod))) continue; } From d90b023718a17d308d831fde36b3bb6fa3b511e0 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 4 May 2025 18:26:45 -0500 Subject: [PATCH 426/559] smb3 client: warn when parse contexts returns error on compounded operation Coverity noticed that the rc on smb2_parse_contexts() was not being checked in the case of compounded operations. Since we don't want to stop parsing the following compounded responses which are likely valid, we can't easily error out here, but at least print a warning message if server has a bug causing us to skip parsing the open response contexts. Addresses-Coverity: 1639191 Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/smb2inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 57d9bfbadd97..2a3e46b8e15a 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -666,6 +666,8 @@ finished: /* smb2_parse_contexts() fills idata->fi.IndexNumber */ rc = smb2_parse_contexts(server, &rsp_iov[0], &oparms->fid->epoch, oparms->fid->lease_key, &oplock, &idata->fi, NULL); + if (rc) + cifs_dbg(VFS, "rc: %d parsing context of compound op\n", rc); } for (i = 0; i < num_cmds; i++) { From 42e31f0daf80d9f7bc4ab4000f2795ec3ddf5206 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 6 May 2025 13:10:12 +0200 Subject: [PATCH 427/559] mm,mm_init: Mark set_high_memory as __init set_high_memory() touches arch_zone_lowest_possible_pfn which is marked as __initdata, which creates a section mismatch. Since the only user of the function is free_area_init() which is also marked as __init, mark set_high_memory() as __init as well. Signed-off-by: Oscar Salvador Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202505060901.Qcs06UoB-lkp@intel.com/ Link: https://lore.kernel.org/r/20250506111012.108743-1-osalvador@suse.de Signed-off-by: Mike Rapoport (Microsoft) --- mm/mm_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index 9659689b8ace..327764ca0ee4 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1786,7 +1786,7 @@ static bool arch_has_descending_max_zone_pfns(void) return IS_ENABLED(CONFIG_ARC) && !IS_ENABLED(CONFIG_ARC_HAS_PAE40); } -static void set_high_memory(void) +static void __init set_high_memory(void) { phys_addr_t highmem = memblock_end_of_DRAM(); From c1d9dac0db168198b6f63f460665256dedad9b6e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 2 May 2025 16:40:31 -0600 Subject: [PATCH 428/559] vfio/pci: Align huge faults to order The vfio-pci huge_fault handler doesn't make any attempt to insert a mapping containing the faulting address, it only inserts mappings if the faulting address and resulting pfn are aligned. This works in a lot of cases, particularly in conjunction with QEMU where DMA mappings linearly fault the mmap. However, there are configurations where we don't get that linear faulting and pages are faulted on-demand. The scenario reported in the bug below is such a case, where the physical address width of the CPU is greater than that of the IOMMU, resulting in a VM where guest firmware has mapped device MMIO beyond the address width of the IOMMU. In this configuration, the MMIO is faulted on demand and tracing indicates that occasionally the faults generate a VM_FAULT_OOM. Given the use case, this results in a "error: kvm run failed Bad address", killing the VM. The host is not under memory pressure in this test, therefore it's suspected that VM_FAULT_OOM is actually the result of a NULL return from __pte_offset_map_lock() in the get_locked_pte() path from insert_pfn(). This suggests a potential race inserting a pte concurrent to a pmd, and maybe indicates some deficiency in the mm layer properly handling such a case. Nevertheless, Peter noted the inconsistency of vfio-pci's huge_fault handler where our mapping granularity depends on the alignment of the faulting address relative to the order rather than aligning the faulting address to the order to more consistently insert huge mappings. This change not only uses the page tables more consistently and efficiently, but as any fault to an aligned page results in the same mapping, the race condition suspected in the VM_FAULT_OOM is avoided. Reported-by: Adolfo Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220057 Fixes: 09dfc8a5f2ce ("vfio/pci: Fallback huge faults for unaligned pfn") Cc: stable@vger.kernel.org Tested-by: Adolfo Co-developed-by: Peter Xu Signed-off-by: Peter Xu Link: https://lore.kernel.org/r/20250502224035.3183451-1-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 35f9046af315..6328c3a05bcd 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1646,14 +1646,14 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf, { struct vm_area_struct *vma = vmf->vma; struct vfio_pci_core_device *vdev = vma->vm_private_data; - unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff; + unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1); + unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; + unsigned long pfn = vma_to_pfn(vma) + pgoff; vm_fault_t ret = VM_FAULT_SIGBUS; - pfn = vma_to_pfn(vma) + pgoff; - - if (order && (pfn & ((1 << order) - 1) || - vmf->address & ((PAGE_SIZE << order) - 1) || - vmf->address + (PAGE_SIZE << order) > vma->vm_end)) { + if (order && (addr < vma->vm_start || + addr + (PAGE_SIZE << order) > vma->vm_end || + pfn & ((1 << order) - 1))) { ret = VM_FAULT_FALLBACK; goto out; } From 023c1f2f0609218103cbcb48e0104b144d4a16dc Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Thu, 24 Apr 2025 18:01:42 +0530 Subject: [PATCH 429/559] wifi: cfg80211: fix out-of-bounds access during multi-link element defragmentation Currently during the multi-link element defragmentation process, the multi-link element length added to the total IEs length when calculating the length of remaining IEs after the multi-link element in cfg80211_defrag_mle(). This could lead to out-of-bounds access if the multi-link element or its corresponding fragment elements are the last elements in the IEs buffer. To address this issue, correctly calculate the remaining IEs length by deducting the multi-link element end offset from total IEs end offset. Cc: stable@vger.kernel.org Fixes: 2481b5da9c6b ("wifi: cfg80211: handle BSS data contained in ML probe responses") Signed-off-by: Veerendranath Jakkam Link: https://patch.msgid.link/20250424-fix_mle_defragmentation_oob_access-v1-1-84412a1743fa@quicinc.com Signed-off-by: Johannes Berg --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 9865f305275d..ddd3a97f6609 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2681,7 +2681,7 @@ cfg80211_defrag_mle(const struct element *mle, const u8 *ie, size_t ielen, /* Required length for first defragmentation */ buf_len = mle->datalen - 1; for_each_element(elem, mle->data + mle->datalen, - ielen - sizeof(*mle) + mle->datalen) { + ie + ielen - mle->data - mle->datalen) { if (elem->id != WLAN_EID_FRAGMENT) break; From e12a42f64fc3d74872b349eedd47f90c6676b78a Mon Sep 17 00:00:00 2001 From: Michael-CY Lee Date: Mon, 5 May 2025 16:19:46 +0800 Subject: [PATCH 430/559] wifi: mac80211: fix the type of status_code for negotiated TID to Link Mapping The status code should be type of __le16. Fixes: 83e897a961b8 ("wifi: ieee80211: add definitions for negotiated TID to Link map") Fixes: 8f500fbc6c65 ("wifi: mac80211: process and save negotiated TID to Link mapping request") Signed-off-by: Michael-CY Lee Link: https://patch.msgid.link/20250505081946.3927214-1-michael-cy.lee@mediatek.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- net/mac80211/mlme.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 508d466de1cc..457b4fba88bd 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1526,7 +1526,7 @@ struct ieee80211_mgmt { struct { u8 action_code; u8 dialog_token; - u8 status_code; + __le16 status_code; u8 variable[]; } __packed ttlm_res; struct { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5d1f2d6d09ad..35eaf0812c5b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7675,6 +7675,7 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_res); int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 + 2 * 2 * IEEE80211_TTLM_NUM_TIDS; + u16 status_code; skb = dev_alloc_skb(local->tx_headroom + hdr_len + ttlm_max_len); if (!skb) @@ -7697,19 +7698,18 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, WARN_ON(1); fallthrough; case NEG_TTLM_RES_REJECT: - mgmt->u.action.u.ttlm_res.status_code = - WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING; + status_code = WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING; break; case NEG_TTLM_RES_ACCEPT: - mgmt->u.action.u.ttlm_res.status_code = WLAN_STATUS_SUCCESS; + status_code = WLAN_STATUS_SUCCESS; break; case NEG_TTLM_RES_SUGGEST_PREFERRED: - mgmt->u.action.u.ttlm_res.status_code = - WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED; + status_code = WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED; ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm); break; } + mgmt->u.action.u.ttlm_res.status_code = cpu_to_le16(status_code); ieee80211_tx_skb(sdata, skb); } @@ -7875,7 +7875,7 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, * This can be better implemented in the future, to handle request * rejections. */ - if (mgmt->u.action.u.ttlm_res.status_code != WLAN_STATUS_SUCCESS) + if (le16_to_cpu(mgmt->u.action.u.ttlm_res.status_code) != WLAN_STATUS_SUCCESS) __ieee80211_disconnect(sdata); } From ebedf8b7f05b9c886d68d63025db8d1b12343157 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 May 2025 21:42:59 +0200 Subject: [PATCH 431/559] wifi: iwlwifi: add support for Killer on MTL For now, we need another entry for these devices, this will be changed completely for 6.16. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219926 Link: https://patch.msgid.link/20250506214258.2efbdc9e9a82.I31915ec252bd1c74bd53b89a0e214e42a74b6f2e@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index debeea2b3ae5..00056e76ea3d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -588,6 +588,8 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { IWL_DEV_INFO(0x7A70, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), IWL_DEV_INFO(0x7AF0, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), IWL_DEV_INFO(0x7AF0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), + IWL_DEV_INFO(0x7F70, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), + IWL_DEV_INFO(0x7F70, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), IWL_DEV_INFO(0x271C, 0x0214, iwl9260_2ac_cfg, iwl9260_1_name), IWL_DEV_INFO(0x7E40, 0x1691, iwl_cfg_ma, iwl_ax411_killer_1690s_name), From 19f5ca461d5fc09bdf93a9f8e4bd78ed3a49dc71 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 2 May 2025 16:02:33 +0200 Subject: [PATCH 432/559] objtool/rust: add one more `noreturn` Rust function for Rust 1.87.0 Starting with Rust 1.87.0 (expected 2025-05-15), `objtool` may report: rust/core.o: warning: objtool: _R..._4core9panicking9panic_fmt() falls through to next function _R..._4core9panicking18panic_nounwind_fmt() rust/core.o: warning: objtool: _R..._4core9panicking18panic_nounwind_fmt() falls through to next function _R..._4core9panicking5panic() The reason is that `rust_begin_unwind` is now mangled: _R..._7___rustc17rust_begin_unwind Thus add the mangled one to the list so that `objtool` knows it is actually `noreturn`. See commit 56d680dd23c3 ("objtool/rust: list `noreturn` Rust functions") for more details. Alternatively, we could remove the fixed one in `noreturn.h` and relax this test to cover both, but it seems best to be strict as long as we can. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Cc: Josh Poimboeuf Cc: Peter Zijlstra Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250502140237.1659624-2-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- tools/objtool/check.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3a411064fa34..b21b12ec88d9 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -227,6 +227,7 @@ static bool is_rust_noreturn(const struct symbol *func) str_ends_with(func->name, "_4core9panicking19assert_failed_inner") || str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") || str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") || + str_ends_with(func->name, "_7___rustc17rust_begin_unwind") || strstr(func->name, "_4core9panicking13assert_failed") || strstr(func->name, "_4core9panicking11panic_const24panic_const_") || (strstr(func->name, "_4core5slice5index24slice_") && From a39f3087092716f2bd531d6fdc20403c3dc2a879 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 2 May 2025 16:02:34 +0200 Subject: [PATCH 433/559] rust: allow Rust 1.87.0's `clippy::ptr_eq` lint Starting with Rust 1.87.0 (expected 2025-05-15) [1], Clippy may expand the `ptr_eq` lint, e.g.: error: use `core::ptr::eq` when comparing raw pointers --> rust/kernel/list.rs:438:12 | 438 | if self.first == item { | ^^^^^^^^^^^^^^^^^^ help: try: `core::ptr::eq(self.first, item)` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#ptr_eq = note: `-D clippy::ptr-eq` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(clippy::ptr_eq)]` It is expected that a PR to relax the lint will be backported [2] by the time Rust 1.87.0 releases, since the lint was considered too eager (at least by default) [3]. Thus allow the lint temporarily just in case. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust-clippy/pull/14339 [1] Link: https://github.com/rust-lang/rust-clippy/pull/14526 [2] Link: https://github.com/rust-lang/rust-clippy/issues/14525 [3] Link: https://lore.kernel.org/r/20250502140237.1659624-3-ojeda@kernel.org [ Converted to `allow`s since backport was confirmed. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/alloc/kvec.rs | 3 +++ rust/kernel/list.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index ae9d072741ce..87a71fd40c3c 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -2,6 +2,9 @@ //! Implementation of [`Vec`]. +// May not be needed in Rust 1.87.0 (pending beta backport). +#![allow(clippy::ptr_eq)] + use super::{ allocator::{KVmalloc, Kmalloc, Vmalloc}, layout::ArrayLayout, diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs index a335c3b1ff5e..2054682c5724 100644 --- a/rust/kernel/list.rs +++ b/rust/kernel/list.rs @@ -4,6 +4,9 @@ //! A linked list implementation. +// May not be needed in Rust 1.87.0 (pending beta backport). +#![allow(clippy::ptr_eq)] + use crate::sync::ArcBorrow; use crate::types::Opaque; use core::iter::{DoubleEndedIterator, FusedIterator}; From 7129ea6e242b00938532537da41ddf5fa3e21471 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 2 May 2025 16:02:35 +0200 Subject: [PATCH 434/559] rust: clean Rust 1.88.0's `unnecessary_transmutes` lint Starting with Rust 1.88.0 (expected 2025-06-26) [1][2], `rustc` may introduce a new lint that catches unnecessary transmutes, e.g.: error: unnecessary transmute --> rust/uapi/uapi_generated.rs:23242:18 | 23242 | unsafe { ::core::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u8) } | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: replace this with: `(self._bitfield_1.get(0usize, 1u8) as u8 == 1)` | = note: `-D unnecessary-transmutes` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(unnecessary_transmutes)]` There are a lot of them (at least 300), but luckily they are all in `bindgen`-generated code. Thus clean all up by allowing it there. Since unknown lints trigger a lint itself in older compilers, do it conditionally so that we can keep the `unknown_lints` lint enabled. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust/pull/136083 [1] Link: https://github.com/rust-lang/rust/issues/136067 [2] Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250502140237.1659624-4-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- init/Kconfig | 3 +++ rust/bindings/lib.rs | 1 + rust/uapi/lib.rs | 1 + 3 files changed, 5 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index 63f5974b9fa6..4cdd1049283c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -140,6 +140,9 @@ config LD_CAN_USE_KEEP_IN_OVERLAY config RUSTC_HAS_COERCE_POINTEE def_bool RUSTC_VERSION >= 108400 +config RUSTC_HAS_UNNECESSARY_TRANSMUTES + def_bool RUSTC_VERSION >= 108800 + config PAHOLE_VERSION int default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE)) diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index 014af0d1fc70..a08eb5518cac 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -26,6 +26,7 @@ #[allow(dead_code)] #[allow(clippy::undocumented_unsafe_blocks)] +#[cfg_attr(CONFIG_RUSTC_HAS_UNNECESSARY_TRANSMUTES, allow(unnecessary_transmutes))] mod bindings_raw { // Manual definition for blocklisted types. type __kernel_size_t = usize; diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs index 13495910271f..c98d7a8cde77 100644 --- a/rust/uapi/lib.rs +++ b/rust/uapi/lib.rs @@ -24,6 +24,7 @@ unreachable_pub, unsafe_op_in_unsafe_fn )] +#![cfg_attr(CONFIG_RUSTC_HAS_UNNECESSARY_TRANSMUTES, allow(unnecessary_transmutes))] // Manual definition of blocklisted types. type __kernel_size_t = usize; From c016722fd57551f8a6fcf472c9d2bcf2130ea0ec Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 2 May 2025 16:02:36 +0200 Subject: [PATCH 435/559] rust: clean Rust 1.88.0's warning about `clippy::disallowed_macros` configuration Starting with Rust 1.88.0 (expected 2025-06-26) [1], Clippy may start warning about paths that do not resolve in the `disallowed_macros` configuration: warning: `kernel::dbg` does not refer to an existing macro --> .clippy.toml:10:5 | 10 | { path = "kernel::dbg", reason = "the `dbg!` macro is intended as a debugging tool" }, | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This is a lint we requested at [2], due to the trouble debugging the lint due to false negatives (e.g. [3]), which we use to emulate `clippy::dbg_macro` [4]. See commit 8577c9dca799 ("rust: replace `clippy::dbg_macro` with `disallowed_macros`") for more details. Given the false negatives are not resolved yet, it is expected that Clippy complains about not finding this macro. Thus, until the false negatives are fixed (and, even then, probably we will need to wait for the MSRV to raise enough), use the escape hatch to allow an invalid path. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust-clippy/pull/14397 [1] Link: https://github.com/rust-lang/rust-clippy/issues/11432 [2] Link: https://github.com/rust-lang/rust-clippy/issues/11431 [3] Link: https://github.com/rust-lang/rust-clippy/issues/11303 [4] Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250502140237.1659624-5-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- .clippy.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clippy.toml b/.clippy.toml index 815c94732ed7..137f41d203de 100644 --- a/.clippy.toml +++ b/.clippy.toml @@ -7,5 +7,5 @@ check-private-items = true disallowed-macros = [ # The `clippy::dbg_macro` lint only works with `std::dbg!`, thus we simulate # it here, see: https://github.com/rust-lang/rust-clippy/issues/11303. - { path = "kernel::dbg", reason = "the `dbg!` macro is intended as a debugging tool" }, + { path = "kernel::dbg", reason = "the `dbg!` macro is intended as a debugging tool", allow-invalid = true }, ] From 211dcf77856db64c73e0c3b9ce0c624ec855daca Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 2 May 2025 16:02:37 +0200 Subject: [PATCH 436/559] rust: clean Rust 1.88.0's `clippy::uninlined_format_args` lint Starting with Rust 1.88.0 (expected 2025-06-26) [1], `rustc` may move back the `uninlined_format_args` to `style` from `pedantic` (it was there waiting for rust-analyzer suppotr), and thus we will start to see lints like: warning: variables can be used directly in the `format!` string --> rust/macros/kunit.rs:105:37 | 105 | let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{}", test); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args help: change this to | 105 - let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{}", test); 105 + let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{test}"); There is even a case that is a pure removal: warning: variables can be used directly in the `format!` string --> rust/macros/module.rs:51:13 | 51 | format!("{field}={content}\0", field = field, content = content) | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args help: change this to | 51 - format!("{field}={content}\0", field = field, content = content) 51 + format!("{field}={content}\0") The lints all seem like nice cleanups, thus just apply them. We may want to disable `allow-mixed-uninlined-format-args` in the future. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust-clippy/pull/14160 [1] Acked-by: Benno Lossin Reviewed-by: Tamir Duberstein Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250502140237.1659624-6-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- drivers/gpu/nova-core/gpu.rs | 2 +- rust/kernel/str.rs | 46 +++++++++++------------ rust/macros/kunit.rs | 13 ++----- rust/macros/module.rs | 19 +++------- rust/macros/paste.rs | 2 +- rust/pin-init/internal/src/pinned_drop.rs | 3 +- 6 files changed, 35 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 17c9660da450..ab0e5a72a059 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -93,7 +93,7 @@ impl Chipset { // For now, redirect to fmt::Debug for convenience. impl fmt::Display for Chipset { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{:?}", self) + write!(f, "{self:?}") } } diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index 878111cb77bc..fb61ce81ea28 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -73,7 +73,7 @@ impl fmt::Display for BStr { b'\r' => f.write_str("\\r")?, // Printable characters. 0x20..=0x7e => f.write_char(b as char)?, - _ => write!(f, "\\x{:02x}", b)?, + _ => write!(f, "\\x{b:02x}")?, } } Ok(()) @@ -109,7 +109,7 @@ impl fmt::Debug for BStr { b'\\' => f.write_str("\\\\")?, // Printable characters. 0x20..=0x7e => f.write_char(b as char)?, - _ => write!(f, "\\x{:02x}", b)?, + _ => write!(f, "\\x{b:02x}")?, } } f.write_char('"') @@ -447,7 +447,7 @@ impl fmt::Display for CStr { // Printable character. f.write_char(c as char)?; } else { - write!(f, "\\x{:02x}", c)?; + write!(f, "\\x{c:02x}")?; } } Ok(()) @@ -479,7 +479,7 @@ impl fmt::Debug for CStr { // Printable characters. b'\"' => f.write_str("\\\"")?, 0x20..=0x7e => f.write_char(c as char)?, - _ => write!(f, "\\x{:02x}", c)?, + _ => write!(f, "\\x{c:02x}")?, } } f.write_str("\"") @@ -641,13 +641,13 @@ mod tests { #[test] fn test_cstr_display() { let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap(); - assert_eq!(format!("{}", hello_world), "hello, world!"); + assert_eq!(format!("{hello_world}"), "hello, world!"); let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap(); - assert_eq!(format!("{}", non_printables), "\\x01\\x09\\x0a"); + assert_eq!(format!("{non_printables}"), "\\x01\\x09\\x0a"); let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap(); - assert_eq!(format!("{}", non_ascii), "d\\xe9j\\xe0 vu"); + assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap(); - assert_eq!(format!("{}", good_bytes), "\\xf0\\x9f\\xa6\\x80"); + assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); } #[test] @@ -658,47 +658,47 @@ mod tests { bytes[i as usize] = i.wrapping_add(1); } let cstr = CStr::from_bytes_with_nul(&bytes).unwrap(); - assert_eq!(format!("{}", cstr), ALL_ASCII_CHARS); + assert_eq!(format!("{cstr}"), ALL_ASCII_CHARS); } #[test] fn test_cstr_debug() { let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap(); - assert_eq!(format!("{:?}", hello_world), "\"hello, world!\""); + assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap(); - assert_eq!(format!("{:?}", non_printables), "\"\\x01\\x09\\x0a\""); + assert_eq!(format!("{non_printables:?}"), "\"\\x01\\x09\\x0a\""); let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap(); - assert_eq!(format!("{:?}", non_ascii), "\"d\\xe9j\\xe0 vu\""); + assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap(); - assert_eq!(format!("{:?}", good_bytes), "\"\\xf0\\x9f\\xa6\\x80\""); + assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); } #[test] fn test_bstr_display() { let hello_world = BStr::from_bytes(b"hello, world!"); - assert_eq!(format!("{}", hello_world), "hello, world!"); + assert_eq!(format!("{hello_world}"), "hello, world!"); let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); - assert_eq!(format!("{}", escapes), "_\\t_\\n_\\r_\\_'_\"_"); + assert_eq!(format!("{escapes}"), "_\\t_\\n_\\r_\\_'_\"_"); let others = BStr::from_bytes(b"\x01"); - assert_eq!(format!("{}", others), "\\x01"); + assert_eq!(format!("{others}"), "\\x01"); let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); - assert_eq!(format!("{}", non_ascii), "d\\xe9j\\xe0 vu"); + assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); - assert_eq!(format!("{}", good_bytes), "\\xf0\\x9f\\xa6\\x80"); + assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); } #[test] fn test_bstr_debug() { let hello_world = BStr::from_bytes(b"hello, world!"); - assert_eq!(format!("{:?}", hello_world), "\"hello, world!\""); + assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); - assert_eq!(format!("{:?}", escapes), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\""); + assert_eq!(format!("{escapes:?}"), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\""); let others = BStr::from_bytes(b"\x01"); - assert_eq!(format!("{:?}", others), "\"\\x01\""); + assert_eq!(format!("{others:?}"), "\"\\x01\""); let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); - assert_eq!(format!("{:?}", non_ascii), "\"d\\xe9j\\xe0 vu\""); + assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); - assert_eq!(format!("{:?}", good_bytes), "\"\\xf0\\x9f\\xa6\\x80\""); + assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); } } diff --git a/rust/macros/kunit.rs b/rust/macros/kunit.rs index 4f553ecf40c0..99ccac82edde 100644 --- a/rust/macros/kunit.rs +++ b/rust/macros/kunit.rs @@ -15,10 +15,7 @@ pub(crate) fn kunit_tests(attr: TokenStream, ts: TokenStream) -> TokenStream { } if attr.len() > 255 { - panic!( - "The test suite name `{}` exceeds the maximum length of 255 bytes", - attr - ) + panic!("The test suite name `{attr}` exceeds the maximum length of 255 bytes") } let mut tokens: Vec<_> = ts.into_iter().collect(); @@ -102,16 +99,14 @@ pub(crate) fn kunit_tests(attr: TokenStream, ts: TokenStream) -> TokenStream { let mut kunit_macros = "".to_owned(); let mut test_cases = "".to_owned(); for test in &tests { - let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{}", test); + let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{test}"); let kunit_wrapper = format!( - "unsafe extern \"C\" fn {}(_test: *mut kernel::bindings::kunit) {{ {}(); }}", - kunit_wrapper_fn_name, test + "unsafe extern \"C\" fn {kunit_wrapper_fn_name}(_test: *mut kernel::bindings::kunit) {{ {test}(); }}" ); writeln!(kunit_macros, "{kunit_wrapper}").unwrap(); writeln!( test_cases, - " kernel::kunit::kunit_case(kernel::c_str!(\"{}\"), {}),", - test, kunit_wrapper_fn_name + " kernel::kunit::kunit_case(kernel::c_str!(\"{test}\"), {kunit_wrapper_fn_name})," ) .unwrap(); } diff --git a/rust/macros/module.rs b/rust/macros/module.rs index a9418fbc9b44..2f66107847f7 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -48,7 +48,7 @@ impl<'a> ModInfoBuilder<'a> { ) } else { // Loadable modules' modinfo strings go as-is. - format!("{field}={content}\0", field = field, content = content) + format!("{field}={content}\0") }; write!( @@ -126,10 +126,7 @@ impl ModuleInfo { }; if seen_keys.contains(&key) { - panic!( - "Duplicated key \"{}\". Keys can only be specified once.", - key - ); + panic!("Duplicated key \"{key}\". Keys can only be specified once."); } assert_eq!(expect_punct(it), ':'); @@ -143,10 +140,7 @@ impl ModuleInfo { "license" => info.license = expect_string_ascii(it), "alias" => info.alias = Some(expect_string_array(it)), "firmware" => info.firmware = Some(expect_string_array(it)), - _ => panic!( - "Unknown key \"{}\". Valid keys are: {:?}.", - key, EXPECTED_KEYS - ), + _ => panic!("Unknown key \"{key}\". Valid keys are: {EXPECTED_KEYS:?}."), } assert_eq!(expect_punct(it), ','); @@ -158,7 +152,7 @@ impl ModuleInfo { for key in REQUIRED_KEYS { if !seen_keys.iter().any(|e| e == key) { - panic!("Missing required key \"{}\".", key); + panic!("Missing required key \"{key}\"."); } } @@ -170,10 +164,7 @@ impl ModuleInfo { } if seen_keys != ordered_keys { - panic!( - "Keys are not ordered as expected. Order them like: {:?}.", - ordered_keys - ); + panic!("Keys are not ordered as expected. Order them like: {ordered_keys:?}."); } info diff --git a/rust/macros/paste.rs b/rust/macros/paste.rs index 6529a387673f..cce712d19855 100644 --- a/rust/macros/paste.rs +++ b/rust/macros/paste.rs @@ -50,7 +50,7 @@ fn concat_helper(tokens: &[TokenTree]) -> Vec<(String, Span)> { let tokens = group.stream().into_iter().collect::>(); segments.append(&mut concat_helper(tokens.as_slice())); } - token => panic!("unexpected token in paste segments: {:?}", token), + token => panic!("unexpected token in paste segments: {token:?}"), }; } diff --git a/rust/pin-init/internal/src/pinned_drop.rs b/rust/pin-init/internal/src/pinned_drop.rs index c824dd8b436d..c4ca7a70b726 100644 --- a/rust/pin-init/internal/src/pinned_drop.rs +++ b/rust/pin-init/internal/src/pinned_drop.rs @@ -28,8 +28,7 @@ pub(crate) fn pinned_drop(_args: TokenStream, input: TokenStream) -> TokenStream // Found the end of the generics, this should be `PinnedDrop`. assert!( matches!(tt, TokenTree::Ident(i) if i.to_string() == "PinnedDrop"), - "expected 'PinnedDrop', found: '{:?}'", - tt + "expected 'PinnedDrop', found: '{tt:?}'" ); pinned_drop_idx = Some(i); break; From 5595c31c370957aabe739ac3996aedba8267603f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Anikiel?= Date: Thu, 10 Apr 2025 11:54:20 +0000 Subject: [PATCH 437/559] x86/Kconfig: make CFI_AUTO_DEFAULT depend on !RUST or Rust >= 1.88 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling core::fmt::write() from rust code while FineIBT is enabled results in a kernel panic: [ 4614.199779] kernel BUG at arch/x86/kernel/cet.c:132! [ 4614.205343] Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 4614.211781] CPU: 2 UID: 0 PID: 6057 Comm: dmabuf_dump Tainted: G U O 6.12.17-android16-0-g6ab38c534a43 #1 9da040f27673ec3945e23b998a0f8bd64c846599 [ 4614.227832] Tainted: [U]=USER, [O]=OOT_MODULE [ 4614.241247] RIP: 0010:do_kernel_cp_fault+0xea/0xf0 ... [ 4614.398144] RIP: 0010:_RNvXs5_NtNtNtCs3o2tGsuHyou_4core3fmt3num3impyNtB9_7Display3fmt+0x0/0x20 [ 4614.407792] Code: 48 f7 df 48 0f 48 f9 48 89 f2 89 c6 5d e9 18 fd ff ff 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 41 81 ea 14 61 af 2c 74 03 0f 0b 90 <66> 0f 1f 00 55 48 89 e5 48 89 f2 48 8b 3f be 01 00 00 00 5d e9 e7 [ 4614.428775] RSP: 0018:ffffb95acfa4ba68 EFLAGS: 00010246 [ 4614.434609] RAX: 0000000000000000 RBX: 0000000000000010 RCX: 0000000000000000 [ 4614.442587] RDX: 0000000000000007 RSI: ffffb95acfa4ba70 RDI: ffffb95acfa4bc88 [ 4614.450557] RBP: ffffb95acfa4bae0 R08: ffff0a00ffffff05 R09: 0000000000000070 [ 4614.458527] R10: 0000000000000000 R11: ffffffffab67eaf0 R12: ffffb95acfa4bcc8 [ 4614.466493] R13: ffffffffac5d50f0 R14: 0000000000000000 R15: 0000000000000000 [ 4614.474473] ? __cfi__RNvXs5_NtNtNtCs3o2tGsuHyou_4core3fmt3num3impyNtB9_7Display3fmt+0x10/0x10 [ 4614.484118] ? _RNvNtCs3o2tGsuHyou_4core3fmt5write+0x1d2/0x250 This happens because core::fmt::write() calls core::fmt::rt::Argument::fmt(), which currently has CFI disabled: library/core/src/fmt/rt.rs: 171 // FIXME: Transmuting formatter in new and indirectly branching to/calling 172 // it here is an explicit CFI violation. 173 #[allow(inline_no_sanitize)] 174 #[no_sanitize(cfi, kcfi)] 175 #[inline] 176 pub(super) unsafe fn fmt(&self, f: &mut Formatter<'_>) -> Result { This causes a Control Protection exception, because FineIBT has sealed off the original function's endbr64. This makes rust currently incompatible with FineIBT. Add a Kconfig dependency that prevents FineIBT from getting turned on by default if rust is enabled. [ Rust 1.88.0 (scheduled for 2025-06-26) should have this fixed [1], and thus we relaxed the condition with Rust >= 1.88. When `objtool` lands checking for this with e.g. [2], the plan is to ideally run that in upstream Rust's CI to prevent regressions early [3], since we do not control `core`'s source code. Alice tested the Rust PR backported to an older compiler. Peter would like that Rust provides a stable `core` which can be pulled into the kernel: "Relying on that much out of tree code is 'unfortunate'". - Miguel ] Signed-off-by: Paweł Anikiel Reviewed-by: Alice Ryhl Acked-by: Peter Zijlstra Link: https://github.com/rust-lang/rust/pull/139632 [1] Link: https://lore.kernel.org/rust-for-linux/20250410154556.GB9003@noisy.programming.kicks-ass.net/ [2] Link: https://github.com/rust-lang/rust/pull/139632#issuecomment-2801950873 [3] Link: https://lore.kernel.org/r/20250410115420.366349-1-panikiel@google.com Link: https://lore.kernel.org/r/att0-CANiq72kjDM0cKALVy4POEzhfdT4nO7tqz0Pm7xM+3=_0+L1t=A@mail.gmail.com [ Reduced splat. - Miguel ] Signed-off-by: Miguel Ojeda --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4b9f378e05f6..5873c9e39919 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2368,6 +2368,7 @@ config STRICT_SIGALTSTACK_SIZE config CFI_AUTO_DEFAULT bool "Attempt to use FineIBT by default at boot time" depends on FINEIBT + depends on !RUST || RUSTC_VERSION >= 108800 default y help Attempt to use FineIBT by default at boot time. If enabled, From 0093cb194a7511d1e68865fa35b763c72e44c2f0 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Mon, 5 May 2025 09:19:38 -0700 Subject: [PATCH 438/559] ice: use DSN instead of PCI BDF for ice_adapter index Use Device Serial Number instead of PCI bus/device/function for the index of struct ice_adapter. Functions on the same physical device should point to the very same ice_adapter instance, but with two PFs, when at least one of them is PCI-e passed-through to a VM, it is no longer the case - PFs will get seemingly random PCI BDF values, and thus indices, what finally leds to each of them being on their own instance of ice_adapter. That causes them to don't attempt any synchronization of the PTP HW clock usage, or any other future resources. DSN works nicely in place of the index, as it is "immutable" in terms of virtualization. Fixes: 0e2bddf9e5f9 ("ice: add ice_adapter for shared data across PFs on the same NIC") Suggested-by: Jacob Keller Suggested-by: Jakub Kicinski Suggested-by: Jiri Pirko Reviewed-by: Aleksandr Loktionov Signed-off-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20250505161939.2083581-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_adapter.c | 47 ++++++++------------ drivers/net/ethernet/intel/ice/ice_adapter.h | 6 ++- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c index 01a08cfd0090..66e070095d1b 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.c +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: Copyright Red Hat -#include #include #include #include @@ -14,32 +13,16 @@ static DEFINE_XARRAY(ice_adapters); static DEFINE_MUTEX(ice_adapters_mutex); -/* PCI bus number is 8 bits. Slot is 5 bits. Domain can have the rest. */ -#define INDEX_FIELD_DOMAIN GENMASK(BITS_PER_LONG - 1, 13) -#define INDEX_FIELD_DEV GENMASK(31, 16) -#define INDEX_FIELD_BUS GENMASK(12, 5) -#define INDEX_FIELD_SLOT GENMASK(4, 0) - -static unsigned long ice_adapter_index(const struct pci_dev *pdev) +static unsigned long ice_adapter_index(u64 dsn) { - unsigned int domain = pci_domain_nr(pdev->bus); - - WARN_ON(domain > FIELD_MAX(INDEX_FIELD_DOMAIN)); - - switch (pdev->device) { - case ICE_DEV_ID_E825C_BACKPLANE: - case ICE_DEV_ID_E825C_QSFP: - case ICE_DEV_ID_E825C_SFP: - case ICE_DEV_ID_E825C_SGMII: - return FIELD_PREP(INDEX_FIELD_DEV, pdev->device); - default: - return FIELD_PREP(INDEX_FIELD_DOMAIN, domain) | - FIELD_PREP(INDEX_FIELD_BUS, pdev->bus->number) | - FIELD_PREP(INDEX_FIELD_SLOT, PCI_SLOT(pdev->devfn)); - } +#if BITS_PER_LONG == 64 + return dsn; +#else + return (u32)dsn ^ (u32)(dsn >> 32); +#endif } -static struct ice_adapter *ice_adapter_new(void) +static struct ice_adapter *ice_adapter_new(u64 dsn) { struct ice_adapter *adapter; @@ -47,6 +30,7 @@ static struct ice_adapter *ice_adapter_new(void) if (!adapter) return NULL; + adapter->device_serial_number = dsn; spin_lock_init(&adapter->ptp_gltsyn_time_lock); refcount_set(&adapter->refcount, 1); @@ -77,23 +61,26 @@ static void ice_adapter_free(struct ice_adapter *adapter) * Return: Pointer to ice_adapter on success. * ERR_PTR() on error. -ENOMEM is the only possible error. */ -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); + u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; + unsigned long index; int err; + index = ice_adapter_index(dsn); scoped_guard(mutex, &ice_adapters_mutex) { err = xa_insert(&ice_adapters, index, NULL, GFP_KERNEL); if (err == -EBUSY) { adapter = xa_load(&ice_adapters, index); refcount_inc(&adapter->refcount); + WARN_ON_ONCE(adapter->device_serial_number != dsn); return adapter; } if (err) return ERR_PTR(err); - adapter = ice_adapter_new(); + adapter = ice_adapter_new(dsn); if (!adapter) return ERR_PTR(-ENOMEM); xa_store(&ice_adapters, index, adapter, GFP_KERNEL); @@ -110,11 +97,13 @@ struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) * * Context: Process, may sleep. */ -void ice_adapter_put(const struct pci_dev *pdev) +void ice_adapter_put(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); + u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; + unsigned long index; + index = ice_adapter_index(dsn); scoped_guard(mutex, &ice_adapters_mutex) { adapter = xa_load(&ice_adapters, index); if (WARN_ON(!adapter)) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h index e233225848b3..ac15c0d2bc1a 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.h +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h @@ -32,6 +32,7 @@ struct ice_port_list { * @refcount: Reference count. struct ice_pf objects hold the references. * @ctrl_pf: Control PF of the adapter * @ports: Ports list + * @device_serial_number: DSN cached for collision detection on 32bit systems */ struct ice_adapter { refcount_t refcount; @@ -40,9 +41,10 @@ struct ice_adapter { struct ice_pf *ctrl_pf; struct ice_port_list ports; + u64 device_serial_number; }; -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev); -void ice_adapter_put(const struct pci_dev *pdev); +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev); +void ice_adapter_put(struct pci_dev *pdev); #endif /* _ICE_ADAPTER_H */ From 08e9f2d584c4732180edee4cb2dbfa7586d7d5a3 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Mon, 5 May 2025 22:47:13 +0300 Subject: [PATCH 439/559] net: Lock netdevices during dev_shutdown __qdisc_destroy() calls into various qdiscs .destroy() op, which in turn can call .ndo_setup_tc(), which requires the netdev instance lock. This commit extends the critical section in unregister_netdevice_many_notify() to cover dev_shutdown() (and dev_tcx_uninstall() as a side-effect) and acquires the netdev instance lock in __dev_change_net_namespace() for the other dev_shutdown() call. This should now guarantee that for all qdisc ops, the netdev instance lock is held during .ndo_setup_tc(). Fixes: a0527ee2df3f ("net: hold netdev instance lock during qdisc ndo_setup_tc") Signed-off-by: Cosmin Ratiu Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250505194713.1723399-1-cratiu@nvidia.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1be7cb73a602..92e004c354ea 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11966,9 +11966,9 @@ void unregister_netdevice_many_notify(struct list_head *head, struct sk_buff *skb = NULL; /* Shutdown queueing discipline. */ + netdev_lock_ops(dev); dev_shutdown(dev); dev_tcx_uninstall(dev); - netdev_lock_ops(dev); dev_xdp_uninstall(dev); dev_memory_provider_uninstall(dev); netdev_unlock_ops(dev); @@ -12161,7 +12161,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, synchronize_net(); /* Shutdown queueing discipline. */ + netdev_lock_ops(dev); dev_shutdown(dev); + netdev_unlock_ops(dev); /* Notify protocols, that we are about to destroy * this device. They should clean all the things. From 35076d2223c731f7be75af61e67f90807384d030 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 6 May 2025 18:18:50 +0800 Subject: [PATCH 440/559] erofs: ensure the extra temporary copy is valid for shortened bvecs When compressed data deduplication is enabled, multiple logical extents may reference the same compressed physical cluster. The previous commit 94c43de73521 ("erofs: fix wrong primary bvec selection on deduplicated extents") already avoids using shortened bvecs. However, in such cases, the extra temporary buffers also need to be preserved for later use in z_erofs_fill_other_copies() to to prevent data corruption. IOWs, extra temporary buffers have to be retained not only due to varying start relative offsets (`pageofs_out`, as indicated by `pcl->multibases`) but also because of shortened bvecs. android.hardware.graphics.composer@2.1.so : 270696 bytes 0: 0.. 204185 | 204185 : 628019200.. 628084736 | 65536 -> 1: 204185.. 225536 | 21351 : 544063488.. 544129024 | 65536 2: 225536.. 270696 | 45160 : 0.. 0 | 0 com.android.vndk.v28.apex : 93814897 bytes ... 364: 53869896..54095257 | 225361 : 543997952.. 544063488 | 65536 -> 365: 54095257..54309344 | 214087 : 544063488.. 544129024 | 65536 366: 54309344..54514557 | 205213 : 544129024.. 544194560 | 65536 ... Both 204185 and 54095257 have the same start relative offset of 3481, but the logical page 55 of `android.hardware.graphics.composer@2.1.so` ranges from 225280 to 229632, forming a shortened bvec [225280, 225536) that cannot be used for decompressing the range from 54095257 to 54309344 of `com.android.vndk.v28.apex`. Since `pcl->multibases` is already meaningless, just mark `be->keepxcpy` on demand for simplicity. Again, this issue can only lead to data corruption if `-Ededupe` is on. Fixes: 94c43de73521 ("erofs: fix wrong primary bvec selection on deduplicated extents") Reviewed-by: Hongbo Li Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20250506101850.191506-1-hsiangkao@linux.alibaba.com --- fs/erofs/zdata.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 5c061aaeeb45..b8e6b76c23d5 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -79,9 +79,6 @@ struct z_erofs_pcluster { /* L: whether partial decompression or not */ bool partial; - /* L: indicate several pageofs_outs or not */ - bool multibases; - /* L: whether extra buffer allocations are best-effort */ bool besteffort; @@ -1046,8 +1043,6 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, break; erofs_onlinefolio_split(folio); - if (f->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) - f->pcl->multibases = true; if (f->pcl->length < offset + end - map->m_la) { f->pcl->length = offset + end - map->m_la; f->pcl->pageofs_out = map->m_la & ~PAGE_MASK; @@ -1093,7 +1088,6 @@ struct z_erofs_backend { struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES]; struct super_block *sb; struct z_erofs_pcluster *pcl; - /* pages with the longest decompressed length for deduplication */ struct page **decompressed_pages; /* pages to keep the compressed data */ @@ -1102,6 +1096,8 @@ struct z_erofs_backend { struct list_head decompressed_secondary_bvecs; struct page **pagepool; unsigned int onstack_used, nr_pages; + /* indicate if temporary copies should be preserved for later use */ + bool keepxcpy; }; struct z_erofs_bvec_item { @@ -1112,18 +1108,20 @@ struct z_erofs_bvec_item { static void z_erofs_do_decompressed_bvec(struct z_erofs_backend *be, struct z_erofs_bvec *bvec) { + int poff = bvec->offset + be->pcl->pageofs_out; struct z_erofs_bvec_item *item; - unsigned int pgnr; + struct page **page; - if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK) && - (bvec->end == PAGE_SIZE || - bvec->offset + bvec->end == be->pcl->length)) { - pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT; - DBG_BUGON(pgnr >= be->nr_pages); - if (!be->decompressed_pages[pgnr]) { - be->decompressed_pages[pgnr] = bvec->page; + if (!(poff & ~PAGE_MASK) && (bvec->end == PAGE_SIZE || + bvec->offset + bvec->end == be->pcl->length)) { + DBG_BUGON((poff >> PAGE_SHIFT) >= be->nr_pages); + page = be->decompressed_pages + (poff >> PAGE_SHIFT); + if (!*page) { + *page = bvec->page; return; } + } else { + be->keepxcpy = true; } /* (cold path) one pcluster is requested multiple times */ @@ -1289,7 +1287,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) .alg = pcl->algorithmformat, .inplace_io = overlapped, .partial_decoding = pcl->partial, - .fillgaps = pcl->multibases, + .fillgaps = be->keepxcpy, .gfp = pcl->besteffort ? GFP_KERNEL : GFP_NOWAIT | __GFP_NORETRY }, be->pagepool); @@ -1346,7 +1344,6 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) pcl->length = 0; pcl->partial = true; - pcl->multibases = false; pcl->besteffort = false; pcl->bvset.nextpage = NULL; pcl->vcnt = 0; From 78cd408356fe3edbac66598772fd347bf3e32c1f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 5 May 2025 18:19:19 -0700 Subject: [PATCH 441/559] net: add missing instance lock to dev_set_promiscuity Accidentally spotted while trying to understand what else needs to be renamed to netif_ prefix. Most of the calls to dev_set_promiscuity are adjacent to dev_set_allmulti or dev_disable_lro so it should be safe to add the lock. Note that new netif_set_promiscuity is currently unused, the locked paths call __dev_set_promiscuity directly. Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250506011919.2882313-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + net/core/dev.c | 14 +------------- net/core/dev_api.c | 23 +++++++++++++++++++++++ 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2d11d013cabe..7ea022750e4e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4972,6 +4972,7 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); +int netif_set_promiscuity(struct net_device *dev, int inc); int dev_set_promiscuity(struct net_device *dev, int inc); int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); diff --git a/net/core/dev.c b/net/core/dev.c index 92e004c354ea..11da1e272ec2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9193,18 +9193,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) return 0; } -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - * Return 0 if successful or a negative errno code on error. - */ -int dev_set_promiscuity(struct net_device *dev, int inc) +int netif_set_promiscuity(struct net_device *dev, int inc) { unsigned int old_flags = dev->flags; int err; @@ -9216,7 +9205,6 @@ int dev_set_promiscuity(struct net_device *dev, int inc) dev_set_rx_mode(dev); return err; } -EXPORT_SYMBOL(dev_set_promiscuity); int netif_set_allmulti(struct net_device *dev, int inc, bool notify) { diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 90898cd540ce..f9a160ab596f 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -267,6 +267,29 @@ void dev_disable_lro(struct net_device *dev) } EXPORT_SYMBOL(dev_disable_lro); +/** + * dev_set_promiscuity() - update promiscuity count on a device + * @dev: device + * @inc: modifier + * + * Add or remove promiscuity from a device. While the count in the device + * remains above zero the interface remains promiscuous. Once it hits zero + * the device reverts back to normal filtering operation. A negative inc + * value is used to drop promiscuity on the device. + * Return 0 if successful or a negative errno code on error. + */ +int dev_set_promiscuity(struct net_device *dev, int inc) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_promiscuity(dev, inc); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_promiscuity); + /** * dev_set_allmulti() - update allmulti count on a device * @dev: device From 650415fca0a97472fdd79725e35152614d1aad76 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Fri, 2 May 2025 10:58:00 +0200 Subject: [PATCH 442/559] nvme: unblock ctrl state transition for firmware update The original nvme subsystem design didn't have a CONNECTING state; the state machine allowed transitions from RESETTING to LIVE directly. With the introduction of nvme fabrics the CONNECTING state was introduce. Over time the nvme-pci started to use the CONNECTING state as well. Eventually, a bug fix for the nvme-fc started to depend that the only valid transition to LIVE was from CONNECTING. Though this change didn't update the firmware update handler which was still depending on RESETTING to LIVE transition. The simplest way to address it for the time being is to switch into CONNECTING state before going to LIVE state. Fixes: d2fe192348f9 ("nvme: only allow entering LIVE from CONNECTING state") Reported-by: Guenter Roeck Signed-off-by: Daniel Wagner Closes: https://lore.kernel.org/all/0134ea15-8d5f-41f7-9e9a-d7e6d82accaa@roeck-us.net Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Guenter Roeck --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index eb6ea8acb3cc..ac53629fce68 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4493,7 +4493,8 @@ static void nvme_fw_act_work(struct work_struct *work) msleep(100); } - if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING) || + !nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) return; nvme_unquiesce_io_queues(ctrl); From ffea7c73d181db273be8b306be6bc573dfe2257b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 30 Apr 2025 11:59:15 +0100 Subject: [PATCH 443/559] KVM: arm64: Properly save/restore HCRX_EL2 Rather than restoring HCRX_EL2 to a fixed value on vcpu exit, perform a full save/restore of the register, ensuring that we don't lose bits that would have been set at some point in the host kernel lifetime, such as the GCSEn bit. Fixes: ff5181d8a2a82 ("arm64/gcs: Provide basic EL2 setup to allow GCS usage at EL0 and EL1") Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20250430105916.3815157-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/include/hyp/switch.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index b741ea6aefa5..96f625dc7256 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -235,6 +235,8 @@ static inline void __deactivate_traps_mpam(void) static inline void __activate_traps_common(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ write_sysreg(1 << 15, hstr_el2); @@ -245,11 +247,8 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) * EL1 instead of being trapped to EL2. */ if (system_supports_pmuv3()) { - struct kvm_cpu_context *hctxt; - write_sysreg(0, pmselr_el0); - hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); @@ -269,6 +268,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) hcrx &= ~clr; } + ctxt_sys_reg(hctxt, HCRX_EL2) = read_sysreg_s(SYS_HCRX_EL2); write_sysreg_s(hcrx, SYS_HCRX_EL2); } @@ -278,19 +278,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); write_sysreg(0, hstr_el2); if (system_supports_pmuv3()) { - struct kvm_cpu_context *hctxt; - - hctxt = host_data_ptr(host_ctxt); write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); } if (cpus_have_final_cap(ARM64_HAS_HCX)) - write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); + write_sysreg_s(ctxt_sys_reg(hctxt, HCRX_EL2), SYS_HCRX_EL2); __deactivate_traps_hfgxtr(vcpu); __deactivate_traps_mpam(); From ef296ee98bb19ee2a6dbf76155184c99da0d7c71 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 30 Apr 2025 11:59:16 +0100 Subject: [PATCH 444/559] KVM: arm64: Kill HCRX_HOST_FLAGS HCRX_HOST_FLAGS, like most of these hardcoded setups, are not a good match for options that can be selectively enabled or disabled. Nothing but the early setup is relying on it now, so kill the macro and move the bag of bits where they belong. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20250430105916.3815157-3-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/el2_setup.h | 2 +- arch/arm64/include/asm/kvm_arm.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index ebceaae3c749..d40e427ddad9 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -52,7 +52,7 @@ mrs x0, id_aa64mmfr1_el1 ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 cbz x0, .Lskip_hcrx_\@ - mov_q x0, HCRX_HOST_FLAGS + mov_q x0, (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) /* Enable GCS if supported */ mrs_s x1, SYS_ID_AA64PFR1_EL1 diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index bba4b0e93091..e9c8a581e16f 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -102,7 +102,6 @@ #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO) -#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) #define MPAMHCR_HOST_FLAGS 0 /* TCR_EL2 Registers bits */ From 3949e28786cd0afcd96a46ce6629245203f629e5 Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Thu, 1 May 2025 16:24:50 +0000 Subject: [PATCH 445/559] KVM: arm64: Fix memory check in host_stage2_set_owner_locked() I found this simple bug while preparing some patches for pKVM. AFAICT, it should be harmless (besides crashing the kernel if it was misbehaving) Fixes: e94a7dea2972 ("KVM: arm64: Move host page ownership tracking to the hyp vmemmap") Signed-off-by: Mostafa Saleh Link: https://lore.kernel.org/r/20250501162450.2784043-1-smostafa@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 2a5284f749b4..e80f3ebd3e2a 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -503,7 +503,7 @@ int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) { int ret; - if (!addr_is_memory(addr)) + if (!range_is_memory(addr, addr + size)) return -EPERM; ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, From 42420c50c68f3e95e90de2479464f420602229fc Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 30 Apr 2025 15:26:18 +0200 Subject: [PATCH 446/559] s390/pci: Fix missing check for zpci_create_device() error return The zpci_create_device() function returns an error pointer that needs to be checked before dereferencing it as a struct zpci_dev pointer. Add the missing check in __clp_add() where it was missed when adding the scan_list in the fixed commit. Simply not adding the device to the scan list results in the previous behavior. Cc: stable@vger.kernel.org Fixes: 0467cdde8c43 ("s390/pci: Sort PCI functions prior to creating virtual busses") Signed-off-by: Niklas Schnelle Reviewed-by: Gerd Bayer Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_clp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 9a929bbcc397..241f7251c873 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -428,6 +428,8 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) return; } zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state); + if (IS_ERR(zdev)) + return; list_add_tail(&zdev->entry, scan_list); } From 05a2538f2b48500cf4e8a0a0ce76623cc5bafcf1 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 30 Apr 2025 15:26:19 +0200 Subject: [PATCH 447/559] s390/pci: Fix duplicate pci_dev_put() in disable_slot() when PF has child VFs With commit bcb5d6c76903 ("s390/pci: introduce lock to synchronize state of zpci_dev's") the code to ignore power off of a PF that has child VFs was changed from a direct return to a goto to the unlock and pci_dev_put() section. The change however left the existing pci_dev_put() untouched resulting in a doubple put. This can subsequently cause a use after free if the struct pci_dev is released in an unexpected state. Fix this by removing the extra pci_dev_put(). Cc: stable@vger.kernel.org Fixes: bcb5d6c76903 ("s390/pci: introduce lock to synchronize state of zpci_dev's") Signed-off-by: Niklas Schnelle Reviewed-by: Gerd Bayer Signed-off-by: Heiko Carstens --- drivers/pci/hotplug/s390_pci_hpc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index 055518ee354d..e9e9aaa91770 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -59,7 +59,6 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); if (pdev && pci_num_vf(pdev)) { - pci_dev_put(pdev); rc = -EBUSY; goto out; } From cd9c058489053e172a6654cad82ee936d1b09fab Mon Sep 17 00:00:00 2001 From: John Ernberg Date: Fri, 2 May 2025 11:40:55 +0000 Subject: [PATCH 448/559] xen: swiotlb: Use swiotlb bouncing if kmalloc allocation demands it Xen swiotlb support was missed when the patch set starting with 4ab5f8ec7d71 ("mm/slab: decouple ARCH_KMALLOC_MINALIGN from ARCH_DMA_MINALIGN") was merged. When running Xen on iMX8QXP, a SoC without IOMMU, the effect was that USB transfers ended up corrupted when there was more than one URB inflight at the same time. Add a call to dma_kmalloc_needs_bounce() to make sure that allocations too small for DMA get bounced via swiotlb. Closes: https://lore.kernel.org/linux-usb/ab2776f0-b838-4cf6-a12a-c208eb6aad59@actia.se/ Fixes: 4ab5f8ec7d71 ("mm/slab: decouple ARCH_KMALLOC_MINALIGN from ARCH_DMA_MINALIGN") Cc: stable@kernel.org # v6.5+ Signed-off-by: John Ernberg Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross Message-ID: <20250502114043.1968976-2-john.ernberg@actia.se> --- drivers/xen/swiotlb-xen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 1f65795cf5d7..ef56a2500ed6 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -217,6 +217,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, * buffering it. */ if (dma_capable(dev, dev_addr, size, true) && + !dma_kmalloc_needs_bounce(dev, size, dir) && !range_straddles_page_boundary(phys, size) && !xen_arch_need_swiotlb(dev, phys, dev_addr) && !is_swiotlb_force_bounce(dev)) From 687b2bae0efff9b25e071737d6af5004e6e35af5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 May 2025 07:34:24 -0600 Subject: [PATCH 449/559] io_uring: ensure deferred completions are flushed for multishot Multishot normally uses io_req_post_cqe() to post completions, but when stopping it, it may finish up with a deferred completion. This is fine, except if another multishot event triggers before the deferred completions get flushed. If this occurs, then CQEs may get reordered in the CQ ring, as new multishot completions get posted before the deferred ones are flushed. This can cause confusion on the application side, if strict ordering is required for the use case. When multishot posting via io_req_post_cqe(), flush any pending deferred completions first, if any. Cc: stable@vger.kernel.org # 6.1+ Reported-by: Norman Maurer Reported-by: Christian Mazakas Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 769814d71153..541e65a1eebf 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -848,6 +848,14 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags) struct io_ring_ctx *ctx = req->ctx; bool posted; + /* + * If multishot has already posted deferred completions, ensure that + * those are flushed first before posting this one. If not, CQEs + * could get reordered. + */ + if (!wq_list_empty(&ctx->submit_state.compl_reqs)) + __io_submit_flush_completions(ctx); + lockdep_assert(!io_wq_current_is_worker()); lockdep_assert_held(&ctx->uring_lock); From 90989869baae47ee2aa3bcb6f6eb9fbbe4287958 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Tue, 6 May 2025 16:44:56 -0400 Subject: [PATCH 450/559] xenbus: Allow PVH dom0 a non-local xenstore Make xenbus_init() allow a non-local xenstore for a PVH dom0 - it is currently forced to XS_LOCAL. With Hyperlaunch booting dom0 and a xenstore stubdom, dom0 can be handled as a regular XS_HVM following the late init path. Ideally we'd drop the use of xen_initial_domain() and just check for the event channel instead. However, ARM has a xen,enhanced no-xenstore mode, where the event channel and PFN would both be 0. Retain the xen_initial_domain() check, and use that for an additional check when the event channel is 0. Check the full 64bit HVM_PARAM_STORE_EVTCHN value to catch the off chance that high bits are set for the 32bit event channel. Signed-off-by: Jason Andryuk Change-Id: I5506da42e4c6b8e85079fefb2f193c8de17c7437 Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross Message-ID: <20250506204456.5220-1-jason.andryuk@amd.com> --- drivers/xen/xenbus/xenbus_probe.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 6d32ffb01136..86fe6e779056 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -966,9 +966,15 @@ static int __init xenbus_init(void) if (xen_pv_domain()) xen_store_domain_type = XS_PV; if (xen_hvm_domain()) + { xen_store_domain_type = XS_HVM; - if (xen_hvm_domain() && xen_initial_domain()) - xen_store_domain_type = XS_LOCAL; + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (err) + goto out_error; + xen_store_evtchn = (int)v; + if (!v && xen_initial_domain()) + xen_store_domain_type = XS_LOCAL; + } if (xen_pv_domain() && !xen_start_info->store_evtchn) xen_store_domain_type = XS_LOCAL; if (xen_pv_domain() && xen_start_info->store_evtchn) @@ -987,10 +993,6 @@ static int __init xenbus_init(void) xen_store_interface = gfn_to_virt(xen_store_gfn); break; case XS_HVM: - err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); - if (err) - goto out_error; - xen_store_evtchn = (int)v; err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); if (err) goto out_error; From 1f0304dfd9d217c2f8b04a9ef4b3258a66eedd27 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Tue, 6 May 2025 17:09:33 -0400 Subject: [PATCH 451/559] xenbus: Use kref to track req lifetime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marek reported seeing a NULL pointer fault in the xenbus_thread callstack: BUG: kernel NULL pointer dereference, address: 0000000000000000 RIP: e030:__wake_up_common+0x4c/0x180 Call Trace: __wake_up_common_lock+0x82/0xd0 process_msg+0x18e/0x2f0 xenbus_thread+0x165/0x1c0 process_msg+0x18e is req->cb(req). req->cb is set to xs_wake_up(), a thin wrapper around wake_up(), or xenbus_dev_queue_reply(). It seems like it was xs_wake_up() in this case. It seems like req may have woken up the xs_wait_for_reply(), which kfree()ed the req. When xenbus_thread resumes, it faults on the zero-ed data. Linux Device Drivers 2nd edition states: "Normally, a wake_up call can cause an immediate reschedule to happen, meaning that other processes might run before wake_up returns." ... which would match the behaviour observed. Change to keeping two krefs on each request. One for the caller, and one for xenbus_thread. Each will kref_put() when finished, and the last will free it. This use of kref matches the description in Documentation/core-api/kref.rst Link: https://lore.kernel.org/xen-devel/ZO0WrR5J0xuwDIxW@mail-itl/ Reported-by: Marek Marczykowski-Górecki Fixes: fd8aa9095a95 ("xen: optimize xenbus driver for multiple concurrent xenstore accesses") Cc: stable@vger.kernel.org Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <20250506210935.5607-1-jason.andryuk@amd.com> --- drivers/xen/xenbus/xenbus.h | 2 ++ drivers/xen/xenbus/xenbus_comms.c | 9 ++++----- drivers/xen/xenbus/xenbus_dev_frontend.c | 2 +- drivers/xen/xenbus/xenbus_xs.c | 18 ++++++++++++++++-- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index 13821e7e825e..9ac0427724a3 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -77,6 +77,7 @@ enum xb_req_state { struct xb_req_data { struct list_head list; wait_queue_head_t wq; + struct kref kref; struct xsd_sockmsg msg; uint32_t caller_req_id; enum xsd_sockmsg_type type; @@ -103,6 +104,7 @@ int xb_init_comms(void); void xb_deinit_comms(void); int xs_watch_msg(struct xs_watch_event *event); void xs_request_exit(struct xb_req_data *req); +void xs_free_req(struct kref *kref); int xenbus_match(struct device *_dev, const struct device_driver *_drv); int xenbus_dev_probe(struct device *_dev); diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index e5fda0256feb..82df2da1b880 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -309,8 +309,8 @@ static int process_msg(void) virt_wmb(); req->state = xb_req_state_got_reply; req->cb(req); - } else - kfree(req); + } + kref_put(&req->kref, xs_free_req); } mutex_unlock(&xs_response_mutex); @@ -386,14 +386,13 @@ static int process_writes(void) state.req->msg.type = XS_ERROR; state.req->err = err; list_del(&state.req->list); - if (state.req->state == xb_req_state_aborted) - kfree(state.req); - else { + if (state.req->state != xb_req_state_aborted) { /* write err, then update state */ virt_wmb(); state.req->state = xb_req_state_got_reply; wake_up(&state.req->wq); } + kref_put(&state.req->kref, xs_free_req); mutex_unlock(&xb_write_mutex); diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 46f8916597e5..f5c21ba64df5 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -406,7 +406,7 @@ void xenbus_dev_queue_reply(struct xb_req_data *req) mutex_unlock(&u->reply_mutex); kfree(req->body); - kfree(req); + kref_put(&req->kref, xs_free_req); kref_put(&u->kref, xenbus_file_free); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index d32c726f7a12..dcf9182c8451 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -112,6 +112,12 @@ static void xs_suspend_exit(void) wake_up_all(&xs_state_enter_wq); } +void xs_free_req(struct kref *kref) +{ + struct xb_req_data *req = container_of(kref, struct xb_req_data, kref); + kfree(req); +} + static uint32_t xs_request_enter(struct xb_req_data *req) { uint32_t rq_id; @@ -237,6 +243,12 @@ static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg) req->caller_req_id = req->msg.req_id; req->msg.req_id = xs_request_enter(req); + /* + * Take 2nd ref. One for this thread, and the second for the + * xenbus_thread. + */ + kref_get(&req->kref); + mutex_lock(&xb_write_mutex); list_add_tail(&req->list, &xb_write_list); notify = list_is_singular(&xb_write_list); @@ -261,8 +273,8 @@ static void *xs_wait_for_reply(struct xb_req_data *req, struct xsd_sockmsg *msg) if (req->state == xb_req_state_queued || req->state == xb_req_state_wait_reply) req->state = xb_req_state_aborted; - else - kfree(req); + + kref_put(&req->kref, xs_free_req); mutex_unlock(&xb_write_mutex); return ret; @@ -291,6 +303,7 @@ int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par) req->cb = xenbus_dev_queue_reply; req->par = par; req->user_req = true; + kref_init(&req->kref); xs_send(req, msg); @@ -319,6 +332,7 @@ static void *xs_talkv(struct xenbus_transaction t, req->num_vecs = num_vecs; req->cb = xs_wake_up; req->user_req = false; + kref_init(&req->kref); msg.req_id = 0; msg.tx_id = t.id; From a6aeb739974ec73e5217c75a7c008a688d3d5cf1 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Wed, 7 May 2025 09:50:44 +0300 Subject: [PATCH 452/559] module: ensure that kobject_put() is safe for module type kobjects In 'lookup_or_create_module_kobject()', an internal kobject is created using 'module_ktype'. So call to 'kobject_put()' on error handling path causes an attempt to use an uninitialized completion pointer in 'module_kobject_release()'. In this scenario, we just want to release kobject without an extra synchronization required for a regular module unloading process, so adding an extra check whether 'complete()' is actually required makes 'kobject_put()' safe. Reported-by: syzbot+7fb8a372e1f6add936dd@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7fb8a372e1f6add936dd Fixes: 942e443127e9 ("module: Fix mod->mkobj.kobj potentially freed too early") Cc: stable@vger.kernel.org Suggested-by: Petr Pavlu Signed-off-by: Dmitry Antipov Link: https://lore.kernel.org/r/20250507065044.86529-1-dmantipov@yandex.ru Signed-off-by: Petr Pavlu --- kernel/params.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/params.c b/kernel/params.c index e668fc90b83e..b92d64161b75 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -943,7 +943,9 @@ struct kset *module_kset; static void module_kobj_release(struct kobject *kobj) { struct module_kobject *mk = to_module_kobject(kobj); - complete(mk->kobj_completion); + + if (mk->kobj_completion) + complete(mk->kobj_completion); } const struct kobj_type module_ktype = { From 3ca02e63edccb78ef3659bebc68579c7224a6ca2 Mon Sep 17 00:00:00 2001 From: Paul Aurich Date: Tue, 6 May 2025 22:28:09 -0700 Subject: [PATCH 453/559] smb: client: Avoid race in open_cached_dir with lease breaks A pre-existing valid cfid returned from find_or_create_cached_dir might race with a lease break, meaning open_cached_dir doesn't consider it valid, and thinks it's newly-constructed. This leaks a dentry reference if the allocation occurs before the queued lease break work runs. Avoid the race by extending holding the cfid_list_lock across find_or_create_cached_dir and when the result is checked. Cc: stable@vger.kernel.org Reviewed-by: Henrique Carvalho Signed-off-by: Paul Aurich Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index fe738623cf1b..240d82c6f908 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -29,7 +29,6 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, { struct cached_fid *cfid; - spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { if (!strcmp(cfid->path, path)) { /* @@ -38,25 +37,20 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, * being deleted due to a lease break. */ if (!cfid->time || !cfid->has_lease) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } kref_get(&cfid->refcount); - spin_unlock(&cfids->cfid_list_lock); return cfid; } } if (lookup_only) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } if (cfids->num_entries >= max_cached_dirs) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } cfid = init_cached_dir(path); if (cfid == NULL) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } cfid->cfids = cfids; @@ -74,7 +68,6 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, */ cfid->has_lease = true; - spin_unlock(&cfids->cfid_list_lock); return cfid; } @@ -187,8 +180,10 @@ replay_again: if (!utf16_path) return -ENOMEM; + spin_lock(&cfids->cfid_list_lock); cfid = find_or_create_cached_dir(cfids, path, lookup_only, tcon->max_cached_dirs); if (cfid == NULL) { + spin_unlock(&cfids->cfid_list_lock); kfree(utf16_path); return -ENOENT; } @@ -197,7 +192,6 @@ replay_again: * Otherwise, it is either a new entry or laundromat worker removed it * from @cfids->entries. Caller will put last reference if the latter. */ - spin_lock(&cfids->cfid_list_lock); if (cfid->has_lease && cfid->time) { spin_unlock(&cfids->cfid_list_lock); *ret_cfid = cfid; From 473f09f362e5979efbff40c9bbce58892ad39d66 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 6 May 2025 00:22:26 -0400 Subject: [PATCH 454/559] bcachefs: journal_shutdown is EROFS, not EIO We often filter out EROFS errors to avoid log spew after an emergency shutdown - journal_shutdown is just another emergency shutdown error. Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index a615e4852ded..d9ebffa5b3a2 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -269,7 +269,7 @@ x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \ x(BCH_ERR_invalid, invalid_bkey) \ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ - x(EIO, journal_shutdown) \ + x(EROFS, journal_shutdown) \ x(EIO, journal_flush_err) \ x(EIO, journal_write_err) \ x(EIO, btree_node_read_err) \ From 2fea3aa76e352cd071bee71e5c43da339639b310 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 May 2025 13:50:00 -0400 Subject: [PATCH 455/559] bcachefs: Filter out harmless EROFS error messages These just indicate that we're shutting down. Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index fc396b9fa754..dfdbb9259985 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -784,7 +784,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, goto err; ret = bch2_btree_write_buffer_tryflush(trans); - bch_err_msg(c, ret, "flushing btree write buffer"); + if (!bch2_err_matches(ret, EROFS)) + bch_err_msg(c, ret, "flushing btree write buffer"); if (ret) goto err; From da18dabc3784663a088943e613c36cd17aeb52d3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 May 2025 16:54:25 -0400 Subject: [PATCH 456/559] bcachefs: Ensure superblock gets written when we go ERO When we go emergency read-only, make sure we do a final write_super() to persist counters and error counts - this can be critical for piecing together what fsck was doing. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 27943082c093..84a37d971ffd 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -377,6 +377,11 @@ void bch2_fs_read_only(struct bch_fs *c) bch_verbose(c, "marking filesystem clean"); bch2_fs_mark_clean(c); } else { + /* Make sure error counts/counters are persisted */ + mutex_lock(&c->sb_lock); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + bch_verbose(c, "done going read-only, filesystem not clean"); } } From 8e4d28036c293241b312b1fceafb32b994f80fcc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 May 2025 13:32:15 -0400 Subject: [PATCH 457/559] bcachefs: Don't aggressively discard the journal We frequently use 'bcachefs list_journal -a' for debugging, as it provides a record of all btree transactions, and a history of what happened. But it's not so useful if we immediately discard journal buckets right after they're no longer dirty. This tweaks journal reclaim to only discard when we're low on space, keeping the journal mostly un-discarded. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_reclaim.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index ea670c3c43d8..976464d8a695 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -266,10 +266,11 @@ out: static bool should_discard_bucket(struct journal *j, struct journal_device *ja) { - bool ret; - spin_lock(&j->lock); - ret = ja->discard_idx != ja->dirty_idx_ondisk; + unsigned min_free = max(4, ja->nr / 8); + + bool ret = bch2_journal_dev_buckets_available(j, ja, journal_space_discarded) < min_free && + ja->discard_idx != ja->dirty_idx_ondisk; spin_unlock(&j->lock); return ret; From f04f03d3e99bc8f89b6af5debf07ff67d961bc23 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 7 May 2025 14:52:55 -0700 Subject: [PATCH 458/559] Input: synaptics - enable SMBus for HP Elitebook 850 G1 The kernel reports that the touchpad for this device can support SMBus mode. Reported-by: jt Link: https://lore.kernel.org/r/iys5dbv3ldddsgobfkxldazxyp54kay4bozzmagga6emy45jop@2ebvuxgaui4u Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 309c360aab55..fdb471060118 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -190,6 +190,7 @@ static const char * const smbus_pnp_ids[] = { "LEN2054", /* E480 */ "LEN2055", /* E580 */ "LEN2068", /* T14 Gen 1 */ + "SYN3003", /* HP EliteBook 850 G1 */ "SYN3015", /* HP EliteBook 840 G2 */ "SYN3052", /* HP EliteBook 840 G4 */ "SYN3221", /* HP 15-ay000 */ From 6d7ea0881000966607772451b789b5fb5766f11d Mon Sep 17 00:00:00 2001 From: Manuel Fombuena Date: Wed, 7 May 2025 12:05:26 -0700 Subject: [PATCH 459/559] Input: synaptics - enable InterTouch on Dynabook Portege X30-D [ 5.989588] psmouse serio1: synaptics: Your touchpad (PNP: TOS0213 PNP0f03) says it can support a different bus. If i2c-hid and hid-rmi are not used, you might want to try setting psmouse.synaptics_intertouch to 1 and report this to linux-input@vger.kernel.org. [ 6.039923] psmouse serio1: synaptics: Touchpad model: 1, fw: 9.32, id: 0x1e2a1, caps: 0xf00223/0x840300/0x12e800/0x52d884, board id: 3322, fw id: 2658004 The board is labelled TM3322. Present on the Toshiba / Dynabook Portege X30-D and possibly others. Confirmed working well with psmouse.synaptics_intertouch=1 and local build. Signed-off-by: Manuel Fombuena Signed-off-by: Aditya Garg Link: https://lore.kernel.org/r/PN3PR01MB9597711E7933A08389FEC31DB888A@PN3PR01MB9597.INDPRD01.PROD.OUTLOOK.COM Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index fdb471060118..9c63e3c31fa0 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -196,6 +196,7 @@ static const char * const smbus_pnp_ids[] = { "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ "SYN3257", /* HP Envy 13-ad105ng */ + "TOS0213", /* Dynabook Portege X30-D */ NULL }; #endif From 47d768b32e644b56901bb4bbbdb1feb01ea86c85 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Wed, 7 May 2025 12:06:32 -0700 Subject: [PATCH 460/559] Input: synaptics - enable InterTouch on Dynabook Portege X30L-G Enable InterTouch mode on Dynabook Portege X30L-G by adding "TOS01f6" to the list of SMBus-enabled variants. Reported-by: Xuntao Chi Tested-by: Xuntao Chi Signed-off-by: Aditya Garg Link: https://lore.kernel.org/r/PN3PR01MB959786E4AC797160CDA93012B888A@PN3PR01MB9597.INDPRD01.PROD.OUTLOOK.COM Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 9c63e3c31fa0..9a1592378c1b 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -196,6 +196,7 @@ static const char * const smbus_pnp_ids[] = { "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ "SYN3257", /* HP Envy 13-ad105ng */ + "TOS01f6", /* Dynabook Portege X30L-G */ "TOS0213", /* Dynabook Portege X30-D */ NULL }; From 2abc698ac77314e0de5b33a6d96a39c5159d88e4 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Wed, 7 May 2025 12:09:00 -0700 Subject: [PATCH 461/559] Input: synaptics - enable InterTouch on TUXEDO InfinityBook Pro 14 v5 Enable InterTouch mode on TUXEDO InfinityBook Pro 14 v5 by adding "SYN1221" to the list of SMBus-enabled variants. Add support for InterTouch on SYN1221 by adding it to the list of SMBus-enabled variants. Reported-by: Matthias Eilert Tested-by: Matthias Eilert Signed-off-by: Aditya Garg Link: https://lore.kernel.org/r/PN3PR01MB9597C033C4BC20EE2A0C4543B888A@PN3PR01MB9597.INDPRD01.PROD.OUTLOOK.COM Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 9a1592378c1b..a582bf6540c5 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -190,6 +190,7 @@ static const char * const smbus_pnp_ids[] = { "LEN2054", /* E480 */ "LEN2055", /* E580 */ "LEN2068", /* T14 Gen 1 */ + "SYN1221", /* TUXEDO InfinityBook Pro 14 v5 */ "SYN3003", /* HP EliteBook 850 G1 */ "SYN3015", /* HP EliteBook 840 G2 */ "SYN3052", /* HP EliteBook 840 G4 */ From a609cb4cc07aa9ab8f50466622814356c06f2c17 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Wed, 7 May 2025 12:12:15 -0700 Subject: [PATCH 462/559] Input: synaptics - enable InterTouch on Dell Precision M3800 Enable InterTouch mode on Dell Precision M3800 by adding "DLL060d" to the list of SMBus-enabled variants. Reported-by: Markus Rathgeb Signed-off-by: Aditya Garg Link: https://lore.kernel.org/r/PN3PR01MB959789DD6D574E16141E5DC4B888A@PN3PR01MB9597.INDPRD01.PROD.OUTLOOK.COM Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index a582bf6540c5..c5c88a75a019 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -164,6 +164,7 @@ static const char * const topbuttonpad_pnp_ids[] = { #ifdef CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS static const char * const smbus_pnp_ids[] = { /* all of the topbuttonpad_pnp_ids are valid, we just add some extras */ + "DLL060d", /* Dell Precision M3800 */ "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ "LEN0049", /* Yoga 11e */ From e34090d7214e0516eb8722aee295cb2507317c07 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 3 May 2025 01:01:18 +0300 Subject: [PATCH 463/559] ipvs: fix uninit-value for saddr in do_output_route4 syzbot reports for uninit-value for the saddr argument [1]. commit 4754957f04f5 ("ipvs: do not use random local source address for tunnels") already implies that the input value of saddr should be ignored but the code is still reading it which can prevent to connect the route. Fix it by changing the argument to ret_saddr. [1] BUG: KMSAN: uninit-value in do_output_route4+0x42c/0x4d0 net/netfilter/ipvs/ip_vs_xmit.c:147 do_output_route4+0x42c/0x4d0 net/netfilter/ipvs/ip_vs_xmit.c:147 __ip_vs_get_out_rt+0x403/0x21d0 net/netfilter/ipvs/ip_vs_xmit.c:330 ip_vs_tunnel_xmit+0x205/0x2380 net/netfilter/ipvs/ip_vs_xmit.c:1136 ip_vs_in_hook+0x1aa5/0x35b0 net/netfilter/ipvs/ip_vs_core.c:2063 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf7/0x400 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] __ip_local_out+0x758/0x7e0 net/ipv4/ip_output.c:118 ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x6a/0x3c0 net/ipv4/ip_output.c:1501 udp_send_skb+0xfda/0x1b70 net/ipv4/udp.c:1195 udp_sendmsg+0x2fe3/0x33c0 net/ipv4/udp.c:1483 inet_sendmsg+0x1fc/0x280 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x267/0x380 net/socket.c:727 ____sys_sendmsg+0x91b/0xda0 net/socket.c:2566 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2620 __sys_sendmmsg+0x41d/0x880 net/socket.c:2702 __compat_sys_sendmmsg net/compat.c:360 [inline] __do_compat_sys_sendmmsg net/compat.c:367 [inline] __se_compat_sys_sendmmsg net/compat.c:364 [inline] __ia32_compat_sys_sendmmsg+0xc8/0x140 net/compat.c:364 ia32_sys_call+0x3ffa/0x41f0 arch/x86/include/generated/asm/syscalls_32.h:346 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/syscall_32.c:306 do_fast_syscall_32+0x38/0x80 arch/x86/entry/syscall_32.c:331 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/syscall_32.c:369 entry_SYSENTER_compat_after_hwframe+0x84/0x8e Uninit was created at: slab_post_alloc_hook mm/slub.c:4167 [inline] slab_alloc_node mm/slub.c:4210 [inline] __kmalloc_cache_noprof+0x8fa/0xe00 mm/slub.c:4367 kmalloc_noprof include/linux/slab.h:905 [inline] ip_vs_dest_dst_alloc net/netfilter/ipvs/ip_vs_xmit.c:61 [inline] __ip_vs_get_out_rt+0x35d/0x21d0 net/netfilter/ipvs/ip_vs_xmit.c:323 ip_vs_tunnel_xmit+0x205/0x2380 net/netfilter/ipvs/ip_vs_xmit.c:1136 ip_vs_in_hook+0x1aa5/0x35b0 net/netfilter/ipvs/ip_vs_core.c:2063 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf7/0x400 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] __ip_local_out+0x758/0x7e0 net/ipv4/ip_output.c:118 ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x6a/0x3c0 net/ipv4/ip_output.c:1501 udp_send_skb+0xfda/0x1b70 net/ipv4/udp.c:1195 udp_sendmsg+0x2fe3/0x33c0 net/ipv4/udp.c:1483 inet_sendmsg+0x1fc/0x280 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x267/0x380 net/socket.c:727 ____sys_sendmsg+0x91b/0xda0 net/socket.c:2566 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2620 __sys_sendmmsg+0x41d/0x880 net/socket.c:2702 __compat_sys_sendmmsg net/compat.c:360 [inline] __do_compat_sys_sendmmsg net/compat.c:367 [inline] __se_compat_sys_sendmmsg net/compat.c:364 [inline] __ia32_compat_sys_sendmmsg+0xc8/0x140 net/compat.c:364 ia32_sys_call+0x3ffa/0x41f0 arch/x86/include/generated/asm/syscalls_32.h:346 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/syscall_32.c:306 do_fast_syscall_32+0x38/0x80 arch/x86/entry/syscall_32.c:331 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/syscall_32.c:369 entry_SYSENTER_compat_after_hwframe+0x84/0x8e CPU: 0 UID: 0 PID: 22408 Comm: syz.4.5165 Not tainted 6.15.0-rc3-syzkaller-00019-gbc3372351d0c #0 PREEMPT(undef) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Reported-by: syzbot+04b9a82855c8aed20860@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68138dfa.050a0220.14dd7d.0017.GAE@google.com/ Fixes: 4754957f04f5 ("ipvs: do not use random local source address for tunnels") Signed-off-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_xmit.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 3313bceb6cc9..014f07740369 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -119,13 +119,12 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) return false; } -/* Get route to daddr, update *saddr, optionally bind route to saddr */ +/* Get route to daddr, optionally bind route to saddr */ static struct rtable *do_output_route4(struct net *net, __be32 daddr, - int rt_mode, __be32 *saddr) + int rt_mode, __be32 *ret_saddr) { struct flowi4 fl4; struct rtable *rt; - bool loop = false; memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; @@ -135,23 +134,17 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, retry: rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { - /* Invalid saddr ? */ - if (PTR_ERR(rt) == -EINVAL && *saddr && - rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { - *saddr = 0; - flowi4_update_output(&fl4, 0, daddr, 0); - goto retry; - } IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); return NULL; - } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { + } + if (rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { ip_rt_put(rt); - *saddr = fl4.saddr; flowi4_update_output(&fl4, 0, daddr, fl4.saddr); - loop = true; + rt_mode = 0; goto retry; } - *saddr = fl4.saddr; + if (ret_saddr) + *ret_saddr = fl4.saddr; return rt; } @@ -344,19 +337,15 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.ip; } else { - __be32 saddr = htonl(INADDR_ANY); - noref = 0; /* For such unconfigured boxes avoid many route lookups * for performance reasons because we do not remember saddr */ rt_mode &= ~IP_VS_RT_MODE_CONNECT; - rt = do_output_route4(net, daddr, rt_mode, &saddr); + rt = do_output_route4(net, daddr, rt_mode, ret_saddr); if (!rt) goto err_unreach; - if (ret_saddr) - *ret_saddr = saddr; } local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; From 8478a729c0462273188263136880480729e9efca Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 7 May 2025 17:01:59 +0200 Subject: [PATCH 464/559] netfilter: ipset: fix region locking in hash types Region locking introduced in v5.6-rc4 contained three macros to handle the region locks: ahash_bucket_start(), ahash_bucket_end() which gave back the start and end hash bucket values belonging to a given region lock and ahash_region() which should give back the region lock belonging to a given hash bucket. The latter was incorrect which can lead to a race condition between the garbage collector and adding new elements when a hash type of set is defined with timeouts. Fixes: f66ee0410b1c ("netfilter: ipset: Fix "INFO: rcu detected stall in hash_xxx" reports") Reported-by: Kota Toda Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index cf3ce72c3de6..5251524b96af 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -64,7 +64,7 @@ struct hbucket { #define ahash_sizeof_regions(htable_bits) \ (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) #define ahash_region(n, htable_bits) \ - ((n) % ahash_numof_locks(htable_bits)) + ((n) / jhash_size(HTABLE_REGION_BITS)) #define ahash_bucket_start(h, htable_bits) \ ((htable_bits) < HTABLE_REGION_BITS ? 0 \ : (h) * jhash_size(HTABLE_REGION_BITS)) From 9984db63742099ee3f3cff35cf71306d10e64356 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 14 Apr 2025 12:56:48 -0400 Subject: [PATCH 465/559] drm/amd/display: Fix invalid context error in dml helper [Why] "BUG: sleeping function called from invalid context" error. after: "drm/amd/display: Protect FPU in dml2_validate()/dml21_validate()" The populate_dml_plane_cfg_from_plane_state() uses the GFP_KERNEL flag for memory allocation, which shouldn't be used in atomic contexts. The allocation is needed only for using another helper function get_scaler_data_for_plane(). [How] Modify helpers to pass a pointer to scaler_data within existing context, eliminating the need for dynamic memory allocation/deallocation and copying. Fixes: 366e77cd4923 ("drm/amd/display: Protect FPU in dml2_validate()/dml21_validate()") Reviewed-by: Aurabindo Pillai Signed-off-by: Roman Li Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit bd3e84bc98f81b44f2c43936bdadc3241d654259) Cc: stable@vger.kernel.org --- .../amd/display/dc/dml2/dml2_translation_helper.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 2061d43b92e1..ab6baf269801 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -973,7 +973,9 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p } } -static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc_state *context, struct scaler_data *out) +static struct scaler_data *get_scaler_data_for_plane( + const struct dc_plane_state *in, + struct dc_state *context) { int i; struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe; @@ -994,7 +996,7 @@ static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc } ASSERT(i < MAX_PIPES); - memcpy(out, &temp_pipe->plane_res.scl_data, sizeof(*out)); + return &temp_pipe->plane_res.scl_data; } static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, @@ -1057,11 +1059,7 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out const struct dc_plane_state *in, struct dc_state *context, const struct soc_bounding_box_st *soc) { - struct scaler_data *scaler_data = kzalloc(sizeof(*scaler_data), GFP_KERNEL); - if (!scaler_data) - return; - - get_scaler_data_for_plane(in, context, scaler_data); + struct scaler_data *scaler_data = get_scaler_data_for_plane(in, context); out->CursorBPP[location] = dml_cur_32bit; out->CursorWidth[location] = 256; @@ -1126,8 +1124,6 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out out->DynamicMetadataTransmittedBytes[location] = 0; out->NumberOfCursors[location] = 1; - - kfree(scaler_data); } static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml2, From f1c6be3999d2be2673a51a9be0caf9348e254e52 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Wed, 16 Apr 2025 11:26:54 -0400 Subject: [PATCH 466/559] drm/amd/display: more liberal vmin/vmax update for freesync [Why] FAMS2 expects vmin/vmax to be updated in the case when freesync is off, but supported. But we only update it when freesync is enabled. [How] Change the vsync handler such that dc_stream_adjust_vmin_vmax() its called irrespective of whether freesync is enabled. If freesync is supported, then there is no harm in updating vmin/vmax registers. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3546 Reviewed-by: ChiaHsuan Chung Signed-off-by: Aurabindo Pillai Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit cfb2d41831ee5647a4ae0ea7c24971a92d5dfa0d) Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 536f73131c2d..93dd9e273c65 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -673,15 +673,21 @@ static void dm_crtc_high_irq(void *interrupt_params) spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); if (acrtc->dm_irq_params.stream && - acrtc->dm_irq_params.vrr_params.supported && - acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_VARIABLE) { + acrtc->dm_irq_params.vrr_params.supported) { + bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled; + bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled; + bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state == VRR_STATE_ACTIVE_VARIABLE; + mod_freesync_handle_v_update(adev->dm.freesync_module, acrtc->dm_irq_params.stream, &acrtc->dm_irq_params.vrr_params); - dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc->dm_irq_params.stream, - &acrtc->dm_irq_params.vrr_params.adjust); + /* update vmin_vmax only if freesync is enabled, or only if PSR and REPLAY are disabled */ + if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) { + dc_stream_adjust_vmin_vmax(adev->dm.dc, + acrtc->dm_irq_params.stream, + &acrtc->dm_irq_params.vrr_params.adjust); + } } /* From 2a24755774ef8db33139e2d9a968cc585c6488da Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Wed, 23 Apr 2025 10:02:11 -0600 Subject: [PATCH 467/559] drm/amd/display: Remove unnecessary DC_FP_START/DC_FP_END [WHY & HOW] Remove the unnecessary DC_FP_START/DC_FP_END pair to reduce time in preempt_disable. It also fixes "BUG: sleeping function called from invalid context" error messages because of calling kzalloc with GFP_KERNEL which can sleep. Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Hung Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 94da0735b67b3ada90a3e2e017d306f070306f1d) --- .../gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 2a59cc61ed8c..944650cb13de 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2114,8 +2114,6 @@ static bool dcn32_resource_construct( #define REG_STRUCT dccg_regs dccg_regs_init(); - DC_FP_START(); - ctx->dc_bios->regs = &bios_regs; pool->base.res_cap = &res_cap_dcn32; @@ -2501,14 +2499,10 @@ static bool dcn32_resource_construct( if (ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev) && (dc->config.sdpif_request_limit_words_per_umc == 0)) dc->config.sdpif_request_limit_words_per_umc = 16; - DC_FP_END(); - return true; create_fail: - DC_FP_END(); - dcn32_resource_destruct(pool); return false; From eba692ca3abca258b3214a6e4126afefad1822f0 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Thu, 17 Apr 2025 10:24:29 -0400 Subject: [PATCH 468/559] drm/amd/display: Call FP Protect Before Mode Programming/Mode Support [Why] Memory allocation occurs within dml21_validate() for adding phantom planes. May cause kernel to be tainted due to usage of FP Start. [How] Move FP start from dml21_validate to before mode programming/mode support. Calculations requiring floating point are all done within mode programming or mode support. Reviewed-by: Alvin Lee Signed-off-by: Austin Zheng Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit fe3250f10819b411808ab9ae1d824c5fc9b59170) --- drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 5d16f36ec95c..ed6584535e89 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -234,7 +234,9 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s if (!result) return false; + DC_FP_START(); result = dml2_build_mode_programming(mode_programming); + DC_FP_END(); if (!result) return false; @@ -277,7 +279,9 @@ static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *co mode_support->dml2_instance = dml_init->dml2_instance; dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming; + DC_FP_START(); is_supported = dml2_check_mode_supported(mode_support); + DC_FP_END(); if (!is_supported) return false; @@ -288,16 +292,12 @@ bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml { bool out = false; - DC_FP_START(); - /* Use dml_validate_only for fast_validate path */ if (fast_validate) out = dml21_check_mode_support(in_dc, context, dml_ctx); else out = dml21_mode_check_and_programming(in_dc, context, dml_ctx); - DC_FP_END(); - return out; } From 5a3846648c0523fd850b7f0aec78c0139453ab8b Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Fri, 18 Apr 2025 16:31:59 +0800 Subject: [PATCH 469/559] drm/amd/display: Shift DMUB AUX reply command if necessary [Why] Defined value of dmub AUX reply command field get updated but didn't adjust dm receiving side accordingly. [How] Check the received reply command value to see if it's updated version or not. Adjust it if necessary. Fixes: ead08b95fa50 ("drm/amd/display: Fix race condition in DPIA AUX transfer") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit d5c9ade755a9afa210840708a12a8f44c0d532f4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 93dd9e273c65..d9292da56948 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12758,8 +12758,11 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( goto out; } + payload->reply[0] = adev->dm.dmub_notify->aux_reply.command & 0xF; + if (adev->dm.dmub_notify->aux_reply.command & 0xF0) + /* The reply is stored in the top nibble of the command. */ + payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; - payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; if (!payload->write && p_notify->aux_reply.length && (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { From bc70e11b550d37fbd9eaed0f113ba560894f1609 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 16:29:07 +0800 Subject: [PATCH 470/559] drm/amd/display: Fix the checking condition in dmub aux handling [Why & How] Fix the checking condition for detecting AUX_RET_ERROR_PROTOCOL_ERROR. It was wrongly checking by "not equals to" Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 1db6c9e9b62e1a8912f0a281c941099fca678da3) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d9292da56948..68bbe7d57773 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12749,7 +12749,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( * Transient states before tunneling is enabled could * lead to this error. We can ignore this for now. */ - if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) { + if (p_notify->result == AUX_RET_ERROR_PROTOCOL_ERROR) { DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n", payload->address, payload->length, p_notify->result); From 396dc51b3b7ea524bf8061f478332d0039e96d5d Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 16:56:54 +0800 Subject: [PATCH 471/559] drm/amd/display: Remove incorrect checking in dmub aux handler [Why & How] "Request length != reply length" is expected behavior defined in spec. It's not an invalid reply. Besides, replied data handling logic is not designed to be written in amdgpu_dm_process_dmub_aux_transfer_sync(). Remove the incorrectly handling section. Fixes: ead08b95fa50 ("drm/amd/display: Fix race condition in DPIA AUX transfer") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 81b5c6fa62af62fe89ae9576f41aae37830b94cb) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 68bbe7d57773..d63f219de88e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12764,19 +12764,9 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; if (!payload->write && p_notify->aux_reply.length && - (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { - - if (payload->length != p_notify->aux_reply.length) { - DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n", - p_notify->aux_reply.length, - payload->address, payload->length); - *operation_result = AUX_RET_ERROR_INVALID_REPLY; - goto out; - } - + (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) memcpy(payload->data, p_notify->aux_reply.data, p_notify->aux_reply.length); - } /* success */ ret = p_notify->aux_reply.length; From 3924f45d4de7250a603fd7b50379237a6a0e5adf Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 17:50:03 +0800 Subject: [PATCH 472/559] drm/amd/display: Copy AUX read reply data whenever length > 0 [Why] amdgpu_dm_process_dmub_aux_transfer_sync() should return all exact data reply from the sink side. Don't do the analysis job in it. [How] Remove unnecessary check condition AUX_TRANSACTION_REPLY_AUX_ACK. Fixes: ead08b95fa50 ("drm/amd/display: Fix race condition in DPIA AUX transfer") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 9b540e3fe6796fec4fb1344f3be8952fc2f084d4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d63f219de88e..64df8ca448b3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12763,8 +12763,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( /* The reply is stored in the top nibble of the command. */ payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; - if (!payload->write && p_notify->aux_reply.length && - (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) + if (!payload->write && p_notify->aux_reply.length) memcpy(payload->data, p_notify->aux_reply.data, p_notify->aux_reply.length); From 65924ec69b29296845c7f628112353438e63ea56 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 19:22:14 +0800 Subject: [PATCH 473/559] drm/amd/display: Fix wrong handling for AUX_DEFER case [Why] We incorrectly ack all bytes get written when the reply actually is defer. When it's defer, means sink is not ready for the request. We should retry the request. [How] Only reply all data get written when receive I2C_ACK|AUX_ACK. Otherwise, reply the number of actual written bytes received from the sink. Add some messages to facilitate debugging as well. Fixes: ad6756b4d773 ("drm/amd/display: Shift dc link aux to aux_payload") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 3637e457eb0000bc37d8bbbec95964aad2fb29fd) Cc: stable@vger.kernel.org --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 7ceedf626d23..074b79fd5822 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -51,6 +51,9 @@ #define PEAK_FACTOR_X1000 1006 +/* + * This function handles both native AUX and I2C-Over-AUX transactions. + */ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { @@ -87,15 +90,25 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, if (adev->dm.aux_hpd_discon_quirk) { if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE && operation_result == AUX_RET_ERROR_HPD_DISCON) { - result = 0; + result = msg->size; operation_result = AUX_RET_SUCCESS; } } - if (payload.write && result >= 0) - result = msg->size; + /* + * result equals to 0 includes the cases of AUX_DEFER/I2C_DEFER + */ + if (payload.write && result >= 0) { + if (result) { + /*one byte indicating partially written bytes. Force 0 to retry*/ + drm_info(adev_to_drm(adev), "amdgpu: AUX partially written\n"); + result = 0; + } else if (!payload.reply[0]) + /*I2C_ACK|AUX_ACK*/ + result = msg->size; + } - if (result < 0) + if (result < 0) { switch (operation_result) { case AUX_RET_SUCCESS: break; @@ -114,6 +127,13 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, break; } + drm_info(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result); + } + + if (payload.reply[0]) + drm_info(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.", + payload.reply[0]); + return result; } From b7e84fb708392b37e5dbb2a95db9b94a0e3f0aa2 Mon Sep 17 00:00:00 2001 From: Ruijing Dong Date: Fri, 2 May 2025 11:19:26 -0400 Subject: [PATCH 474/559] drm/amdgpu/vcn: using separate VCN1_AON_SOC offset VCN1_AON_SOC_ADDRESS_3_0 offset varies on different VCN generations, the issue in vcn4.0.5 is caused by a different VCN1_AON_SOC_ADDRESS_3_0 offset. This patch does the following: 1. use the same offset for other VCN generations. 2. use the vcn4.0.5 special offset 3. update vcn_4_0 and vcn_5_0 Acked-by: Saleemkhan Jamadar Reviewed-by: Leo Liu Signed-off-by: Ruijing Dong Signed-off-by: Alex Deucher (cherry picked from commit 5c89ceda9984498b28716944633a9a01cbb2c90d) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 - drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 4 +++- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 3 ++- 8 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index cdcdae7f71ce..83adf81defc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -66,7 +66,6 @@ #define VCN_ENC_CMD_REG_WAIT 0x0000000c #define VCN_AON_SOC_ADDRESS_2_0 0x1f800 -#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_VID_IP_ADDRESS_2_0 0x0 #define VCN_AON_IP_ADDRESS_2_0 0x30000 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 8e7a36f26e9c..b8d835c9e17e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -39,6 +39,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index d716510b8dd6..3eec1b8feaee 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -39,6 +39,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 22ae1939476f..0b19f0ab4480 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -40,6 +40,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index c6f6392c1c20..1f777c125b00 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -46,6 +46,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_HARVEST_MMSCH 0 @@ -614,7 +615,8 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, /* VCN global tiling registers */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 3e176b4b7c69..012f6ea928ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index ba603b2246e2..a1171e6152ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -46,6 +46,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 (0x48300 + 0x38000) +#define VCN1_AON_SOC_ADDRESS_3_0 (0x48000 + 0x38000) #define VCN_HARVEST_MMSCH 0 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index d99d05f42f1d..b90da3d3e140 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -533,7 +533,8 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, /* VCN global tiling registers */ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); return; } From d0ce1aaa8531a4a4707711cab5721374751c51b0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 1 May 2025 13:00:16 -0400 Subject: [PATCH 475/559] Revert "drm/amd: Stop evicting resources on APUs in suspend" This reverts commit 3a9626c816db901def438dc2513622e281186d39. This breaks S4 because we end up setting the s3/s0ix flags even when we are entering s4 since prepare is used by both flows. The causes both the S3/s0ix and s4 flags to be set which breaks several checks in the driver which assume they are mutually exclusive. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3634 Cc: Mario Limonciello Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit ce8f7d95899c2869b47ea6ce0b3e5bf304b2fff4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 18 ------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++--------- 3 files changed, 2 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ef6e78224fdf..c3641331d4de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1614,11 +1614,9 @@ static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_cap #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); -void amdgpu_choose_low_power_state(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } -static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif void amdgpu_register_gpu_instance(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index b7f8f2ff143d..707e131f89d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1533,22 +1533,4 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) #endif /* CONFIG_AMD_PMC */ } -/** - * amdgpu_choose_low_power_state - * - * @adev: amdgpu_device_pointer - * - * Choose the target low power state for the GPU - */ -void amdgpu_choose_low_power_state(struct amdgpu_device *adev) -{ - if (adev->in_runpm) - return; - - if (amdgpu_acpi_is_s0ix_active(adev)) - adev->in_s0ix = true; - else if (amdgpu_acpi_is_s3_active(adev)) - adev->in_s3 = true; -} - #endif /* CONFIG_SUSPEND */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7f354cd532dc..5ac7bd5942d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4949,15 +4949,13 @@ int amdgpu_device_prepare(struct drm_device *dev) struct amdgpu_device *adev = drm_to_adev(dev); int i, r; - amdgpu_choose_low_power_state(adev); - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; /* Evict the majority of BOs before starting suspend sequence */ r = amdgpu_device_evict_resources(adev); if (r) - goto unprepare; + return r; flush_delayed_work(&adev->gfx.gfx_off_delay_work); @@ -4968,15 +4966,10 @@ int amdgpu_device_prepare(struct drm_device *dev) continue; r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]); if (r) - goto unprepare; + return r; } return 0; - -unprepare: - adev->in_s0ix = adev->in_s3 = adev->in_s4 = false; - - return r; } /** From 4aaffc85751da5722e858e4333e8cf0aa4b6c78f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 1 May 2025 13:46:46 -0400 Subject: [PATCH 476/559] drm/amdgpu: fix pm notifier handling Set the s3/s0ix and s4 flags in the pm notifier so that we can skip the resource evictions properly in pm prepare based on whether we are suspending or hibernating. Drop the eviction as processes are not frozen at this time, we we can end up getting stuck trying to evict VRAM while applications continue to submit work which causes the buffers to get pulled back into VRAM. v2: Move suspend flags out of pm notifier (Mario) Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4178 Fixes: 2965e6355dcd ("drm/amd: Add Suspend/Hibernate notification callback support") Cc: Mario Limonciello Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 06f2dcc241e7e5c681f81fbc46cacdf4bfd7d6d7) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 +++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 +--------- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5ac7bd5942d0..f8b3e04d71ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4907,28 +4907,20 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) * @data: data * * This function is called when the system is about to suspend or hibernate. - * It is used to evict resources from the device before the system goes to - * sleep while there is still access to swap. + * It is used to set the appropriate flags so that eviction can be optimized + * in the pm prepare callback. */ static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, void *data) { struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb); - int r; switch (mode) { case PM_HIBERNATION_PREPARE: adev->in_s4 = true; - fallthrough; - case PM_SUSPEND_PREPARE: - r = amdgpu_device_evict_resources(adev); - /* - * This is considered non-fatal at this time because - * amdgpu_device_prepare() will also fatally evict resources. - * See https://gitlab.freedesktop.org/drm/amd/-/issues/3781 - */ - if (r) - drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r); + break; + case PM_POST_HIBERNATION: + adev->in_s4 = false; break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 24ee4710f807..72c807f5822e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2615,13 +2615,8 @@ static int amdgpu_pmops_freeze(struct device *dev) static int amdgpu_pmops_thaw(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(drm_dev); - int r; - r = amdgpu_device_resume(drm_dev, true); - adev->in_s4 = false; - - return r; + return amdgpu_device_resume(drm_dev, true); } static int amdgpu_pmops_poweroff(struct device *dev) @@ -2634,9 +2629,6 @@ static int amdgpu_pmops_poweroff(struct device *dev) static int amdgpu_pmops_restore(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(drm_dev); - - adev->in_s4 = false; return amdgpu_device_resume(drm_dev, true); } From f690e3974755a650259a45d71456decc9c96a282 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:45:04 -0400 Subject: [PATCH 477/559] drm/amdgpu/hdp4: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: c9b8dcabb52a ("drm/amdgpu/hdp4.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 5c937b4a6050316af37ef214825b6340b5e9e391) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index f1dc13b3ab38..cbbeadeb53f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -41,7 +41,12 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From 6beb6835c1fbb3f676aebb51a5fee6b77fed9308 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 6 May 2025 16:28:54 +0200 Subject: [PATCH 478/559] openvswitch: Fix unsafe attribute parsing in output_userspace() This patch replaces the manual Netlink attribute iteration in output_userspace() with nla_for_each_nested(), which ensures that only well-formed attributes are processed. Fixes: ccb1352e76cf ("net: Add Open vSwitch kernel components.") Signed-off-by: Eelco Chaudron Acked-by: Ilya Maximets Acked-by: Aaron Conole Link: https://patch.msgid.link/0bd65949df61591d9171c0dc13e42cea8941da10.1746541734.git.echaudro@redhat.com Signed-off-by: Jakub Kicinski --- net/openvswitch/actions.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 61fea7baae5d..2f22ca59586f 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -975,8 +975,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.mru = OVS_CB(skb)->mru; - for (a = nla_data(attr), rem = nla_len(attr); rem > 0; - a = nla_next(a, &rem)) { + nla_for_each_nested(a, attr, rem) { switch (nla_type(a)) { case OVS_USERSPACE_ATTR_USERDATA: upcall.userdata = a; From 4a7843cc8a41b9612becccc07715ed017770eb89 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 6 May 2025 18:56:47 +0200 Subject: [PATCH 479/559] net: airoha: Add missing field to ppe_mbox_data struct The official Airoha EN7581 firmware requires adding max_packet field in ppe_mbox_data struct while the unofficial one used to develop the Airoha EN7581 flowtable support does not require this field. This patch does not introduce any real backwards compatible issue since EN7581 fw is not publicly available in linux-firmware or other repositories (e.g. OpenWrt) yet and the official fw version will use this new layout. For this reason this change needs to be backported. Moreover, make explicit the padding added by the compiler introducing the rsv array in init_info struct. At the same time use u32 instead of int for init_info and set_info struct definitions in ppe_mbox_data struct. Fixes: 23290c7bc190d ("net: airoha: Introduce Airoha NPU support") Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250506-airoha-en7581-fix-ppe_mbox_data-v5-1-29cabed6864d@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_npu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c index 7a5710f9ccf6..ead0625e781f 100644 --- a/drivers/net/ethernet/airoha/airoha_npu.c +++ b/drivers/net/ethernet/airoha/airoha_npu.c @@ -104,12 +104,14 @@ struct ppe_mbox_data { u8 xpon_hal_api; u8 wan_xsi; u8 ct_joyme4; - int ppe_type; - int wan_mode; - int wan_sel; + u8 max_packet; + u8 rsv[3]; + u32 ppe_type; + u32 wan_mode; + u32 wan_sel; } init_info; struct { - int func_id; + u32 func_id; u32 size; u32 data; } set_info; From c4327229948879814229b46aa26a750718888503 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 5 May 2025 21:58:04 +0200 Subject: [PATCH 480/559] bpf: Scrub packet on bpf_redirect_peer When bpf_redirect_peer is used to redirect packets to a device in another network namespace, the skb isn't scrubbed. That can lead skb information from one namespace to be "misused" in another namespace. As one example, this is causing Cilium to drop traffic when using bpf_redirect_peer to redirect packets that just went through IPsec decryption to a container namespace. The following pwru trace shows (1) the packet path from the host's XFRM layer to the container's XFRM layer where it's dropped and (2) the number of active skb extensions at each function. NETNS MARK IFACE TUPLE FUNC 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 xfrm_rcv_cb .active_extensions = (__u8)2, 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 xfrm4_rcv_cb .active_extensions = (__u8)2, 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 gro_cells_receive .active_extensions = (__u8)2, [...] 4026533547 0 eth0 10.244.3.124:35473->10.244.2.158:53 skb_do_redirect .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 ip_rcv .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 ip_rcv_core .active_extensions = (__u8)2, [...] 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 udp_queue_rcv_one_skb .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 __xfrm_policy_check .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 __xfrm_decode_session .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 security_xfrm_decode_session .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 kfree_skb_reason(SKB_DROP_REASON_XFRM_POLICY) .active_extensions = (__u8)2, In this case, there are no XFRM policies in the container's network namespace so the drop is unexpected. When we decrypt the IPsec packet, the XFRM state used for decryption is set in the skb extensions. This information is preserved across the netns switch. When we reach the XFRM policy check in the container's netns, __xfrm_policy_check drops the packet with LINUX_MIB_XFRMINNOPOLS because a (container-side) XFRM policy can't be found that matches the (host-side) XFRM state used for decryption. This patch fixes this by scrubbing the packet when using bpf_redirect_peer, as is done on typical netns switches via veth devices except skb->mark and skb->tstamp are not zeroed. Fixes: 9aa1206e8f482 ("bpf: Add redirect_peer helper") Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/1728ead5e0fe45e7a6542c36bd4e3ca07a73b7d6.1746460653.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski --- net/core/filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/filter.c b/net/core/filter.c index 79cab4d78dc3..577a4504e26f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2509,6 +2509,7 @@ int skb_do_redirect(struct sk_buff *skb) goto out_drop; skb->dev = dev; dev_sw_netstats_rx_add(dev, skb->len); + skb_scrub_packet(skb, false); return -EAGAIN; } return flags & BPF_F_NEIGH ? From f5c79ffdc250bc8c90fd4fdf1e5d7ac4647912d5 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 5 May 2025 21:58:39 +0200 Subject: [PATCH 481/559] bpf: Clarify handling of mark and tstamp by redirect_peer When switching network namespaces with the bpf_redirect_peer helper, the skb->mark and skb->tstamp fields are not zeroed out like they can be on a typical netns switch. This patch clarifies that in the helper description. Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/ccc86af26d43c5c0b776bcba2601b7479c0d46d0.1746460653.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/bpf.h | 3 +++ tools/include/uapi/linux/bpf.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 28705ae67784..fd404729b115 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4968,6 +4968,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 28705ae67784..fd404729b115 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4968,6 +4968,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The From e5641daa0ea1932e2b0fa7f27843e712244f6538 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 6 May 2025 16:35:44 +0530 Subject: [PATCH 482/559] net: ti: icssg-prueth: Set XDP feature flags for ndev xdp_features demonstrates what all XDP capabilities are supported on a given network device. The driver needs to set these xdp_features flag to let the network stack know what XDP features a given driver is supporting. These flags need to be set for a given ndev irrespective of any XDP program being loaded or not. Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250506110546.4065715-2-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 443f90fa6557..ee35fecf61e7 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1109,11 +1109,6 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame static int emac_xdp_setup(struct prueth_emac *emac, struct netdev_bpf *bpf) { struct bpf_prog *prog = bpf->prog; - xdp_features_t val; - - val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_NDO_XMIT; - xdp_set_features_flag(emac->ndev, val); if (!emac->xdpi.prog && !prog) return 0; @@ -1291,6 +1286,10 @@ static int prueth_netdev_init(struct prueth *prueth, ndev->hw_features = NETIF_F_SG; ndev->features = ndev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; ndev->hw_features |= NETIF_PRUETH_HSR_OFFLOAD_FEATURES; + xdp_set_features_flag(ndev, + NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT); netif_napi_add(ndev, &emac->napi_rx, icssg_napi_rx_poll); hrtimer_setup(&emac->rx_hrtimer, &emac_rx_timer_callback, CLOCK_MONOTONIC, From 8b3fae3e2376b70b7a76005263bc34d034b9c7bf Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 6 May 2025 16:35:45 +0530 Subject: [PATCH 483/559] net: ti: icssg-prueth: Fix kernel panic during concurrent Tx queue access Add __netif_tx_lock() to ensure that only one packet is being transmitted at a time to avoid race conditions in the netif_txq struct and prevent packet data corruption. Failing to do so causes kernel panic with the following error: [ 2184.746764] ------------[ cut here ]------------ [ 2184.751412] kernel BUG at lib/dynamic_queue_limits.c:99! [ 2184.756728] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP logs: https://gist.github.com/MeghanaMalladiTI/9c7aa5fc3b7fb03f87c74aad487956e9 The lock is acquired before calling emac_xmit_xdp_frame() and released after the call returns. This ensures that the TX queue is protected from concurrent access during the transmission of XDP frames. Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250506110546.4065715-3-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_common.c | 7 ++++++- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index b4be76e13a2f..38a80e004d1c 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -650,6 +650,8 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, struct page *page, u32 *len) { struct net_device *ndev = emac->ndev; + struct netdev_queue *netif_txq; + int cpu = smp_processor_id(); struct bpf_prog *xdp_prog; struct xdp_frame *xdpf; u32 pkt_len = *len; @@ -669,8 +671,11 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, goto drop; } - q_idx = smp_processor_id() % emac->tx_ch_num; + q_idx = cpu % emac->tx_ch_num; + netif_txq = netdev_get_tx_queue(ndev, q_idx); + __netif_tx_lock(netif_txq, cpu); result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx); + __netif_tx_unlock(netif_txq); if (result == ICSSG_XDP_CONSUMED) { ndev->stats.tx_dropped++; goto drop; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index ee35fecf61e7..86fc1278127c 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1075,17 +1075,21 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame { struct prueth_emac *emac = netdev_priv(dev); struct net_device *ndev = emac->ndev; + struct netdev_queue *netif_txq; + int cpu = smp_processor_id(); struct xdp_frame *xdpf; unsigned int q_idx; int nxmit = 0; u32 err; int i; - q_idx = smp_processor_id() % emac->tx_ch_num; + q_idx = cpu % emac->tx_ch_num; + netif_txq = netdev_get_tx_queue(ndev, q_idx); if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; + __netif_tx_lock(netif_txq, cpu); for (i = 0; i < n; i++) { xdpf = frames[i]; err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx); @@ -1095,6 +1099,7 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame } nxmit++; } + __netif_tx_unlock(netif_txq); return nxmit; } From 1884fc85ae6ed0652fa324c66b1d89e25174e73b Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 6 May 2025 16:35:46 +0530 Subject: [PATCH 484/559] net: ti: icssg-prueth: Report BQL before sending XDP packets When sending out any kind of traffic, it is essential that the driver keeps reporting BQL of the number of bytes that have been sent so that BQL can track the amount of data in the queue and prevents it from overflowing. If BQL is not reported, the driver may continue sending packets even when the queue is full, leading to packet loss, congestion and decreased network performance. Currently this is missing in emac_xmit_xdp_frame() and this patch fixes it. Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250506110546.4065715-4-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_common.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 38a80e004d1c..d88a0180294e 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -187,7 +187,6 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, xdp_return_frame(xdpf); break; default: - netdev_err(ndev, "tx_complete: invalid swdata type %d\n", swdata->type); prueth_xmit_free(tx_chn, desc_tx); ndev->stats.tx_dropped++; continue; @@ -567,6 +566,7 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, { struct cppi5_host_desc_t *first_desc; struct net_device *ndev = emac->ndev; + struct netdev_queue *netif_txq; struct prueth_tx_chn *tx_chn; dma_addr_t desc_dma, buf_dma; struct prueth_swdata *swdata; @@ -620,12 +620,17 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, swdata->data.xdpf = xdpf; } + /* Report BQL before sending the packet */ + netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); + netdev_tx_sent_queue(netif_txq, xdpf->len); + cppi5_hdesc_set_pktlen(first_desc, xdpf->len); desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc); ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); if (ret) { netdev_err(ndev, "xdp tx: push failed: %d\n", ret); + netdev_tx_completed_queue(netif_txq, 1, xdpf->len); goto drop_free_descs; } @@ -984,6 +989,7 @@ enum netdev_tx icssg_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); if (ret) { netdev_err(ndev, "tx: push failed: %d\n", ret); + netdev_tx_completed_queue(netif_txq, 1, pkt_len); goto drop_free_descs; } From 5f93185a757ff38b36f849c659aeef368db15a68 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:00 +0200 Subject: [PATCH 485/559] net: dsa: b53: allow leaky reserved multicast Allow reserved multicast to ignore VLAN membership so STP and other management protocols work without a PVID VLAN configured when using a vlan aware bridge. Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-2-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index e5ba71897906..62866165ad03 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -373,9 +373,11 @@ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL5, &vc5); } + vc1 &= ~VC1_RX_MCST_FWD_EN; + if (enable) { vc0 |= VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID; - vc1 |= VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN; + vc1 |= VC1_RX_MCST_UNTAG_EN; vc4 &= ~VC4_ING_VID_CHECK_MASK; if (enable_filtering) { vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S; @@ -393,7 +395,7 @@ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, } else { vc0 &= ~(VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID); - vc1 &= ~(VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN); + vc1 &= ~VC1_RX_MCST_UNTAG_EN; vc4 &= ~VC4_ING_VID_CHECK_MASK; vc5 &= ~VC5_DROP_VTABLE_MISS; From 425f11d4cc9bd9e97e6825d9abb2c51a068ca7b5 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:01 +0200 Subject: [PATCH 486/559] net: dsa: b53: keep CPU port always tagged again The Broadcom management header does not carry the original VLAN tag state information, just the ingress port, so for untagged frames we do not know from which VLAN they originated. Therefore keep the CPU port always tagged except for VLAN 0. Fixes the following setup: $ ip link add br0 type bridge vlan_filtering 1 $ ip link set sw1p1 master br0 $ bridge vlan add dev br0 pvid untagged self $ ip link add sw1p2.10 link sw1p2 type vlan id 10 Where VID 10 would stay untagged on the CPU port. Fixes: 2c32a3d3c233 ("net: dsa: b53: Do not force CPU to be always tagged") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-3-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 62866165ad03..9d4fb54b4ced 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1135,6 +1135,11 @@ static int b53_setup(struct dsa_switch *ds) */ ds->untag_bridge_pvid = dev->tag_protocol == DSA_TAG_PROTO_NONE; + /* The switch does not tell us the original VLAN for untagged + * packets, so keep the CPU port always tagged. + */ + ds->untag_vlan_aware_bridge_pvid = true; + ret = b53_reset_switch(dev); if (ret) { dev_err(ds->dev, "failed to reset switch\n"); @@ -1545,6 +1550,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (vlan->vid == 0 && vlan->vid == b53_default_pvid(dev)) untagged = true; + if (vlan->vid > 0 && dsa_is_cpu_port(ds, port)) + untagged = false; + vl->members |= BIT(port); if (untagged && !b53_vlan_port_needs_forced_tagged(ds, port)) vl->untag |= BIT(port); From f480851981043d9bb6447ca9883ade9247b9a0ad Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:02 +0200 Subject: [PATCH 487/559] net: dsa: b53: fix clearing PVID of a port Currently the PVID of ports are only set when adding/updating VLANs with PVID set or removing VLANs, but not when clearing the PVID flag of a VLAN. E.g. the following flow $ ip link add br0 type bridge vlan_filtering 1 $ ip link set sw1p1 master bridge $ bridge vlan add dev sw1p1 vid 10 pvid untagged $ bridge vlan add dev sw1p1 vid 10 untagged Would keep the PVID set as 10, despite the flag being cleared. Fix this by checking if we need to unset the PVID on vlan updates. Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-4-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 9d4fb54b4ced..65d74c455c57 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1537,12 +1537,21 @@ int b53_vlan_add(struct dsa_switch *ds, int port, bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; struct b53_vlan *vl; + u16 old_pvid, new_pvid; int err; err = b53_vlan_prepare(ds, port, vlan); if (err) return err; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &old_pvid); + if (pvid) + new_pvid = vlan->vid; + else if (!pvid && vlan->vid == old_pvid) + new_pvid = b53_default_pvid(dev); + else + new_pvid = old_pvid; + vl = &dev->vlans[vlan->vid]; b53_get_vlan_entry(dev, vlan->vid, vl); @@ -1562,9 +1571,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, b53_set_vlan_entry(dev, vlan->vid, vl); b53_fast_age_vlan(dev, vlan->vid); - if (pvid && !dsa_is_cpu_port(ds, port)) { + if (!dsa_is_cpu_port(ds, port) && new_pvid != old_pvid) { b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), - vlan->vid); + new_pvid); b53_fast_age_vlan(dev, vlan->vid); } From 083c6b28c0cbcd83b6af1a10f2c82937129b3438 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:03 +0200 Subject: [PATCH 488/559] net: dsa: b53: fix flushing old pvid VLAN on pvid change Presumably the intention here was to flush the VLAN of the old pvid, not the added VLAN again, which we already flushed before. Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-5-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 65d74c455c57..c67c0b5fbc1b 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1574,7 +1574,7 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (!dsa_is_cpu_port(ds, port) && new_pvid != old_pvid) { b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), new_pvid); - b53_fast_age_vlan(dev, vlan->vid); + b53_fast_age_vlan(dev, old_pvid); } return 0; From a1c1901c5cc881425cc45992ab6c5418174e9e5a Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:04 +0200 Subject: [PATCH 489/559] net: dsa: b53: fix VLAN ID for untagged vlan on bridge leave The untagged default VLAN is added to the default vlan, which may be one, but we modify the VLAN 0 entry on bridge leave. Fix this to use the correct VLAN entry for the default pvid. Fixes: fea83353177a ("net: dsa: b53: Fix default VLAN ID") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-6-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index c67c0b5fbc1b..c60b552b945c 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1986,7 +1986,7 @@ EXPORT_SYMBOL(b53_br_join); void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) { struct b53_device *dev = ds->priv; - struct b53_vlan *vl = &dev->vlans[0]; + struct b53_vlan *vl; s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index; unsigned int i; u16 pvlan, reg, pvid; @@ -2012,6 +2012,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) dev->ports[port].vlan_ctl_mask = pvlan; pvid = b53_default_pvid(dev); + vl = &dev->vlans[pvid]; /* Make this port join all VLANs without VLAN entries */ if (is58xx(dev)) { From 13b152ae40495966501697693f048f47430c50fd Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:05 +0200 Subject: [PATCH 490/559] net: dsa: b53: always rejoin default untagged VLAN on bridge leave While JOIN_ALL_VLAN allows to join all VLANs, we still need to keep the default VLAN enabled so that untagged traffic stays untagged. So rejoin the default VLAN even for switches with JOIN_ALL_VLAN support. Fixes: 48aea33a77ab ("net: dsa: b53: Add JOIN_ALL_VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-7-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index c60b552b945c..4871e117f5ef 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2021,12 +2021,12 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) if (!(reg & BIT(cpu_port))) reg |= BIT(cpu_port); b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); - } else { - b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(cpu_port); - vl->untag |= BIT(port) | BIT(cpu_port); - b53_set_vlan_entry(dev, pvid, vl); } + + b53_get_vlan_entry(dev, pvid, vl); + vl->members |= BIT(port) | BIT(cpu_port); + vl->untag |= BIT(port) | BIT(cpu_port); + b53_set_vlan_entry(dev, pvid, vl); } EXPORT_SYMBOL(b53_br_leave); From 45e9d59d39503bb3e6ab4d258caea4ba6496e2dc Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:06 +0200 Subject: [PATCH 491/559] net: dsa: b53: do not allow to configure VLAN 0 Since we cannot set forwarding destinations per VLAN, we should not have a VLAN 0 configured, as it would allow untagged traffic to work across ports on VLAN aware bridges regardless if a PVID untagged VLAN exists. So remove the VLAN 0 on join, an re-add it on leave. But only do so if we have a VLAN aware bridge, as without it, untagged traffic would become tagged with VID 0 on a VLAN unaware bridge. Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-8-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 36 ++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 4871e117f5ef..0b28791cca52 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1544,6 +1544,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (err) return err; + if (vlan->vid == 0) + return 0; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &old_pvid); if (pvid) new_pvid = vlan->vid; @@ -1556,10 +1559,7 @@ int b53_vlan_add(struct dsa_switch *ds, int port, b53_get_vlan_entry(dev, vlan->vid, vl); - if (vlan->vid == 0 && vlan->vid == b53_default_pvid(dev)) - untagged = true; - - if (vlan->vid > 0 && dsa_is_cpu_port(ds, port)) + if (dsa_is_cpu_port(ds, port)) untagged = false; vl->members |= BIT(port); @@ -1589,6 +1589,9 @@ int b53_vlan_del(struct dsa_switch *ds, int port, struct b53_vlan *vl; u16 pvid; + if (vlan->vid == 0) + return 0; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid); vl = &dev->vlans[vlan->vid]; @@ -1935,8 +1938,9 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, bool *tx_fwd_offload, struct netlink_ext_ack *extack) { struct b53_device *dev = ds->priv; + struct b53_vlan *vl; s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index; - u16 pvlan, reg; + u16 pvlan, reg, pvid; unsigned int i; /* On 7278, port 7 which connects to the ASP should only receive @@ -1945,6 +1949,9 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, if (dev->chip_id == BCM7278_DEVICE_ID && port == 7) return -EINVAL; + pvid = b53_default_pvid(dev); + vl = &dev->vlans[pvid]; + /* Make this port leave the all VLANs join since we will have proper * VLAN entries from now on */ @@ -1956,6 +1963,15 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); } + if (ds->vlan_filtering) { + b53_get_vlan_entry(dev, pvid, vl); + vl->members &= ~BIT(port); + if (vl->members == BIT(cpu_port)) + vl->members &= ~BIT(cpu_port); + vl->untag = vl->members; + b53_set_vlan_entry(dev, pvid, vl); + } + b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), &pvlan); b53_for_each_port(dev, i) { @@ -2023,10 +2039,12 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); } - b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(cpu_port); - vl->untag |= BIT(port) | BIT(cpu_port); - b53_set_vlan_entry(dev, pvid, vl); + if (ds->vlan_filtering) { + b53_get_vlan_entry(dev, pvid, vl); + vl->members |= BIT(port) | BIT(cpu_port); + vl->untag |= BIT(port) | BIT(cpu_port); + b53_set_vlan_entry(dev, pvid, vl); + } } EXPORT_SYMBOL(b53_br_leave); From f089652b6b16452535dcc5cbaa6e2bb05acd3f93 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:07 +0200 Subject: [PATCH 492/559] net: dsa: b53: do not program vlans when vlan filtering is off Documentation/networking/switchdev.rst says: - with VLAN filtering turned off: the bridge is strictly VLAN unaware and its data path will process all Ethernet frames as if they are VLAN-untagged. The bridge VLAN database can still be modified, but the modifications should have no effect while VLAN filtering is turned off. This breaks if we immediately apply the VLAN configuration, so skip writing it when vlan_filtering is off. Fixes: 0ee2af4ebbe3 ("net: dsa: set configure_vlan_while_not_filtering to true by default") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-9-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 48 +++++++++++++++++++------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 0b28791cca52..ee2f1be62618 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1547,6 +1547,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (vlan->vid == 0) return 0; + if (!ds->vlan_filtering) + return 0; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &old_pvid); if (pvid) new_pvid = vlan->vid; @@ -1592,6 +1595,9 @@ int b53_vlan_del(struct dsa_switch *ds, int port, if (vlan->vid == 0) return 0; + if (!ds->vlan_filtering) + return 0; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid); vl = &dev->vlans[vlan->vid]; @@ -1952,18 +1958,20 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, pvid = b53_default_pvid(dev); vl = &dev->vlans[pvid]; - /* Make this port leave the all VLANs join since we will have proper - * VLAN entries from now on - */ - if (is58xx(dev)) { - b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); - reg &= ~BIT(port); - if ((reg & BIT(cpu_port)) == BIT(cpu_port)) - reg &= ~BIT(cpu_port); - b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); - } - if (ds->vlan_filtering) { + /* Make this port leave the all VLANs join since we will have + * proper VLAN entries from now on + */ + if (is58xx(dev)) { + b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, + ®); + reg &= ~BIT(port); + if ((reg & BIT(cpu_port)) == BIT(cpu_port)) + reg &= ~BIT(cpu_port); + b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, + reg); + } + b53_get_vlan_entry(dev, pvid, vl); vl->members &= ~BIT(port); if (vl->members == BIT(cpu_port)) @@ -2030,16 +2038,16 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) pvid = b53_default_pvid(dev); vl = &dev->vlans[pvid]; - /* Make this port join all VLANs without VLAN entries */ - if (is58xx(dev)) { - b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); - reg |= BIT(port); - if (!(reg & BIT(cpu_port))) - reg |= BIT(cpu_port); - b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); - } - if (ds->vlan_filtering) { + /* Make this port join all VLANs without VLAN entries */ + if (is58xx(dev)) { + b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); + reg |= BIT(port); + if (!(reg & BIT(cpu_port))) + reg |= BIT(cpu_port); + b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); + } + b53_get_vlan_entry(dev, pvid, vl); vl->members |= BIT(port) | BIT(cpu_port); vl->untag |= BIT(port) | BIT(cpu_port); From 2dc2bd57111582895e10f54ea380329c89873f1c Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:08 +0200 Subject: [PATCH 493/559] net: dsa: b53: fix toggling vlan_filtering To allow runtime switching between vlan aware and vlan non-aware mode, we need to properly keep track of any bridge VLAN configuration. Likewise, we need to know when we actually switch between both modes, to not have to rewrite the full VLAN table every time we update the VLANs. So keep track of the current vlan_filtering mode, and on changes, apply the appropriate VLAN configuration. Fixes: 0ee2af4ebbe3 ("net: dsa: set configure_vlan_while_not_filtering to true by default") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-10-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 110 +++++++++++++++++++++---------- drivers/net/dsa/b53/b53_priv.h | 2 + 2 files changed, 78 insertions(+), 34 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index ee2f1be62618..0a7749b416f7 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -763,6 +763,22 @@ static bool b53_vlan_port_needs_forced_tagged(struct dsa_switch *ds, int port) return dev->tag_protocol == DSA_TAG_PROTO_NONE && dsa_is_cpu_port(ds, port); } +static bool b53_vlan_port_may_join_untagged(struct dsa_switch *ds, int port) +{ + struct b53_device *dev = ds->priv; + struct dsa_port *dp; + + if (!dev->vlan_filtering) + return true; + + dp = dsa_to_port(ds, port); + + if (dsa_port_is_cpu(dp)) + return true; + + return dp->bridge == NULL; +} + int b53_configure_vlan(struct dsa_switch *ds) { struct b53_device *dev = ds->priv; @@ -781,7 +797,7 @@ int b53_configure_vlan(struct dsa_switch *ds) b53_do_vlan_op(dev, VTA_CMD_CLEAR); } - b53_enable_vlan(dev, -1, dev->vlan_enabled, ds->vlan_filtering); + b53_enable_vlan(dev, -1, dev->vlan_enabled, dev->vlan_filtering); /* Create an untagged VLAN entry for the default PVID in case * CONFIG_VLAN_8021Q is disabled and there are no calls to @@ -789,26 +805,39 @@ int b53_configure_vlan(struct dsa_switch *ds) * entry. Do this only when the tagging protocol is not * DSA_TAG_PROTO_NONE */ + v = &dev->vlans[def_vid]; b53_for_each_port(dev, i) { - v = &dev->vlans[def_vid]; - v->members |= BIT(i); - if (!b53_vlan_port_needs_forced_tagged(ds, i)) - v->untag = v->members; - b53_write16(dev, B53_VLAN_PAGE, - B53_VLAN_PORT_DEF_TAG(i), def_vid); - } - - /* Upon initial call we have not set-up any VLANs, but upon - * system resume, we need to restore all VLAN entries. - */ - for (vid = def_vid; vid < dev->num_vlans; vid++) { - v = &dev->vlans[vid]; - - if (!v->members) + if (!b53_vlan_port_may_join_untagged(ds, i)) continue; - b53_set_vlan_entry(dev, vid, v); - b53_fast_age_vlan(dev, vid); + vl.members |= BIT(i); + if (!b53_vlan_port_needs_forced_tagged(ds, i)) + vl.untag = vl.members; + b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(i), + def_vid); + } + b53_set_vlan_entry(dev, def_vid, &vl); + + if (dev->vlan_filtering) { + /* Upon initial call we have not set-up any VLANs, but upon + * system resume, we need to restore all VLAN entries. + */ + for (vid = def_vid + 1; vid < dev->num_vlans; vid++) { + v = &dev->vlans[vid]; + + if (!v->members) + continue; + + b53_set_vlan_entry(dev, vid, v); + b53_fast_age_vlan(dev, vid); + } + + b53_for_each_port(dev, i) { + if (!dsa_is_cpu_port(ds, i)) + b53_write16(dev, B53_VLAN_PAGE, + B53_VLAN_PORT_DEF_TAG(i), + dev->ports[i].pvid); + } } return 0; @@ -1127,7 +1156,9 @@ EXPORT_SYMBOL(b53_setup_devlink_resources); static int b53_setup(struct dsa_switch *ds) { struct b53_device *dev = ds->priv; + struct b53_vlan *vl; unsigned int port; + u16 pvid; int ret; /* Request bridge PVID untagged when DSA_TAG_PROTO_NONE is set @@ -1146,6 +1177,15 @@ static int b53_setup(struct dsa_switch *ds) return ret; } + /* setup default vlan for filtering mode */ + pvid = b53_default_pvid(dev); + vl = &dev->vlans[pvid]; + b53_for_each_port(dev, port) { + vl->members |= BIT(port); + if (!b53_vlan_port_needs_forced_tagged(ds, port)) + vl->untag |= BIT(port); + } + b53_reset_mib(dev); ret = b53_apply_config(dev); @@ -1499,7 +1539,10 @@ int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, { struct b53_device *dev = ds->priv; - b53_enable_vlan(dev, port, dev->vlan_enabled, vlan_filtering); + if (dev->vlan_filtering != vlan_filtering) { + dev->vlan_filtering = vlan_filtering; + b53_apply_config(dev); + } return 0; } @@ -1524,7 +1567,7 @@ static int b53_vlan_prepare(struct dsa_switch *ds, int port, if (vlan->vid >= dev->num_vlans) return -ERANGE; - b53_enable_vlan(dev, port, true, ds->vlan_filtering); + b53_enable_vlan(dev, port, true, dev->vlan_filtering); return 0; } @@ -1547,21 +1590,17 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (vlan->vid == 0) return 0; - if (!ds->vlan_filtering) - return 0; - - b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &old_pvid); + old_pvid = dev->ports[port].pvid; if (pvid) new_pvid = vlan->vid; else if (!pvid && vlan->vid == old_pvid) new_pvid = b53_default_pvid(dev); else new_pvid = old_pvid; + dev->ports[port].pvid = new_pvid; vl = &dev->vlans[vlan->vid]; - b53_get_vlan_entry(dev, vlan->vid, vl); - if (dsa_is_cpu_port(ds, port)) untagged = false; @@ -1571,6 +1610,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, else vl->untag &= ~BIT(port); + if (!dev->vlan_filtering) + return 0; + b53_set_vlan_entry(dev, vlan->vid, vl); b53_fast_age_vlan(dev, vlan->vid); @@ -1595,23 +1637,22 @@ int b53_vlan_del(struct dsa_switch *ds, int port, if (vlan->vid == 0) return 0; - if (!ds->vlan_filtering) - return 0; - - b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid); + pvid = dev->ports[port].pvid; vl = &dev->vlans[vlan->vid]; - b53_get_vlan_entry(dev, vlan->vid, vl); - vl->members &= ~BIT(port); if (pvid == vlan->vid) pvid = b53_default_pvid(dev); + dev->ports[port].pvid = pvid; if (untagged && !b53_vlan_port_needs_forced_tagged(ds, port)) vl->untag &= ~(BIT(port)); + if (!dev->vlan_filtering) + return 0; + b53_set_vlan_entry(dev, vlan->vid, vl); b53_fast_age_vlan(dev, vlan->vid); @@ -1958,7 +1999,7 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, pvid = b53_default_pvid(dev); vl = &dev->vlans[pvid]; - if (ds->vlan_filtering) { + if (dev->vlan_filtering) { /* Make this port leave the all VLANs join since we will have * proper VLAN entries from now on */ @@ -2038,7 +2079,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) pvid = b53_default_pvid(dev); vl = &dev->vlans[pvid]; - if (ds->vlan_filtering) { + if (dev->vlan_filtering) { /* Make this port join all VLANs without VLAN entries */ if (is58xx(dev)) { b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); @@ -2803,6 +2844,7 @@ struct b53_device *b53_switch_alloc(struct device *base, ds->ops = &b53_switch_ops; ds->phylink_mac_ops = &b53_phylink_mac_ops; dev->vlan_enabled = true; + dev->vlan_filtering = false; /* Let DSA handle the case were multiple bridges span the same switch * device and different VLAN awareness settings are requested, which * would be breaking filtering semantics for any of the other bridge diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 0166c37a13a7..4636e27fd1ee 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -96,6 +96,7 @@ struct b53_pcs { struct b53_port { u16 vlan_ctl_mask; + u16 pvid; struct ethtool_keee eee; }; @@ -147,6 +148,7 @@ struct b53_device { unsigned int num_vlans; struct b53_vlan *vlans; bool vlan_enabled; + bool vlan_filtering; unsigned int num_ports; struct b53_port *ports; From 9f34ad89bcf0e6df6f8b01f1bdab211493fc66d1 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:09 +0200 Subject: [PATCH 494/559] net: dsa: b53: fix learning on VLAN unaware bridges When VLAN filtering is off, we configure the switch to forward, but not learn on VLAN table misses. This effectively disables learning while not filtering. Fix this by switching to forward and learn. Setting the learning disable register will still control whether learning actually happens. Fixes: dad8d7c6452b ("net: dsa: b53: Properly account for VLAN filtering") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-11-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 0a7749b416f7..a2c0b44fc6be 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -383,7 +383,7 @@ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S; vc5 |= VC5_DROP_VTABLE_MISS; } else { - vc4 |= VC4_ING_VID_VIO_FWD << VC4_ING_VID_CHECK_S; + vc4 |= VC4_NO_ING_VID_CHK << VC4_ING_VID_CHECK_S; vc5 &= ~VC5_DROP_VTABLE_MISS; } From 2e7179c628d3cb9aee75e412473813b099e11ed4 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:10 +0200 Subject: [PATCH 495/559] net: dsa: b53: do not set learning and unicast/multicast on up When a port gets set up, b53 disables learning and enables the port for flooding. This can undo any bridge configuration on the port. E.g. the following flow would disable learning on a port: $ ip link add br0 type bridge $ ip link set sw1p1 master br0 <- enables learning for sw1p1 $ ip link set br0 up $ ip link set sw1p1 up <- disables learning again Fix this by populating dsa_switch_ops::port_setup(), and set up initial config there. Fixes: f9b3827ee66c ("net: dsa: b53: Support setting learning on port") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-12-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 21 +++++++++++++-------- drivers/net/dsa/b53/b53_priv.h | 1 + drivers/net/dsa/bcm_sf2.c | 1 + 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index a2c0b44fc6be..9eb39cfa5fb2 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -578,6 +578,18 @@ static void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable) b53_write16(dev, B53_EEE_PAGE, B53_EEE_EN_CTRL, reg); } +int b53_setup_port(struct dsa_switch *ds, int port) +{ + struct b53_device *dev = ds->priv; + + b53_port_set_ucast_flood(dev, port, true); + b53_port_set_mcast_flood(dev, port, true); + b53_port_set_learning(dev, port, false); + + return 0; +} +EXPORT_SYMBOL(b53_setup_port); + int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { struct b53_device *dev = ds->priv; @@ -590,10 +602,6 @@ int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) cpu_port = dsa_to_port(ds, port)->cpu_dp->index; - b53_port_set_ucast_flood(dev, port, true); - b53_port_set_mcast_flood(dev, port, true); - b53_port_set_learning(dev, port, false); - if (dev->ops->irq_enable) ret = dev->ops->irq_enable(dev, port); if (ret) @@ -724,10 +732,6 @@ static void b53_enable_cpu_port(struct b53_device *dev, int port) b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), port_ctrl); b53_brcm_hdr_setup(dev->ds, port); - - b53_port_set_ucast_flood(dev, port, true); - b53_port_set_mcast_flood(dev, port, true); - b53_port_set_learning(dev, port, false); } static void b53_enable_mib(struct b53_device *dev) @@ -2387,6 +2391,7 @@ static const struct dsa_switch_ops b53_switch_ops = { .phy_read = b53_phy_read16, .phy_write = b53_phy_write16, .phylink_get_caps = b53_phylink_get_caps, + .port_setup = b53_setup_port, .port_enable = b53_enable_port, .port_disable = b53_disable_port, .support_eee = b53_support_eee, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 4636e27fd1ee..2cf3e6a81e37 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -384,6 +384,7 @@ enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mprot); void b53_mirror_del(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); +int b53_setup_port(struct dsa_switch *ds, int port); int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy); void b53_disable_port(struct dsa_switch *ds, int port); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index fa2bf3fa9019..454a8c7fd7ee 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1230,6 +1230,7 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .resume = bcm_sf2_sw_resume, .get_wol = bcm_sf2_sw_get_wol, .set_wol = bcm_sf2_sw_set_wol, + .port_setup = b53_setup_port, .port_enable = bcm_sf2_port_setup, .port_disable = bcm_sf2_port_disable, .support_eee = b53_support_eee, From df84d2fd35c6f2d6b2241e47f26e1829eab26186 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Thu, 17 Apr 2025 23:30:41 +0800 Subject: [PATCH 496/559] mailmap: add entries for Lance Yang I'm moving to @linux.dev and mapping my old addresses to it. Link: https://lkml.kernel.org/r/20250417153041.38977-1-lance.yang@linux.dev Signed-off-by: Lance Yang Signed-off-by: Andrew Morton --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 9afde79e1936..6f0a4e965d6a 100644 --- a/.mailmap +++ b/.mailmap @@ -447,6 +447,8 @@ Luca Ceresoli Luca Weiss Lukasz Luba Luo Jie +Lance Yang +Lance Yang Maciej W. Rozycki Maciej W. Rozycki Maharaja Kennadyrajan From bd1261b16d9131d79723d982d54295e7f309797a Mon Sep 17 00:00:00 2001 From: Heming Zhao Date: Mon, 14 Apr 2025 14:01:23 +0800 Subject: [PATCH 497/559] ocfs2: fix the issue with discontiguous allocation in the global_bitmap commit 4eb7b93e0310 ("ocfs2: improve write IO performance when fragmentation is high") introduced another regression. The following ocfs2-test case can trigger this issue: > discontig_runner.sh => activate_discontig_bg.sh => resv_unwritten: > ${RESV_UNWRITTEN_BIN} -f ${WORK_PLACE}/large_testfile -s 0 -l \ > $((${FILE_MAJOR_SIZE_M}*1024*1024)) In my env, test disk size (by "fdisk -l "): > 53687091200 bytes, 104857600 sectors. Above command is: > /usr/local/ocfs2-test/bin/resv_unwritten -f \ > /mnt/ocfs2/ocfs2-activate-discontig-bg-dir/large_testfile -s 0 -l \ > 53187969024 Error log: > [*] Reserve 50724M space for a LARGE file, reserve 200M space for future test. > ioctl error 28: "No space left on device" > resv allocation failed Unknown error -1 > reserve unwritten region from 0 to 53187969024. Call flow: __ocfs2_change_file_space //by ioctl OCFS2_IOC_RESVSP64 ocfs2_allocate_unwritten_extents //start:0 len:53187969024 while() + ocfs2_get_clusters //cpos:0, alloc_size:1623168 (cluster number) + ocfs2_extend_allocation + ocfs2_lock_allocators | + choose OCFS2_AC_USE_MAIN & ocfs2_cluster_group_search | + ocfs2_add_inode_data ocfs2_add_clusters_in_btree __ocfs2_claim_clusters ocfs2_claim_suballoc_bits + During the allocation of the final part of the large file (after ~47GB), no chain had the required contiguous bits_wanted. Consequently, the allocation failed. How to fix: When OCFS2 is encountering fragmented allocation, the file system should stop attempting bits_wanted contiguous allocation and instead provide the largest available contiguous free bits from the cluster groups. Link: https://lkml.kernel.org/r/20250414060125.19938-2-heming.zhao@suse.com Fixes: 4eb7b93e0310 ("ocfs2: improve write IO performance when fragmentation is high") Signed-off-by: Heming Zhao Reported-by: Gautham Ananthakrishna Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/suballoc.c | 38 ++++++++++++++++++++++++++++++++------ fs/ocfs2/suballoc.h | 1 + 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f7b483f0de2a..6ac4dcd54588 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -698,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode, ac, cl); - if (PTR_ERR(bg_bh) == -ENOSPC) + if (PTR_ERR(bg_bh) == -ENOSPC) { + ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG; bg_bh = ocfs2_block_group_alloc_discontig(handle, alloc_inode, ac, cl); + } if (IS_ERR(bg_bh)) { status = PTR_ERR(bg_bh); bg_bh = NULL; @@ -1794,6 +1796,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, { int status; u16 chain; + u32 contig_bits; u64 next_group; struct inode *alloc_inode = ac->ac_inode; struct buffer_head *group_bh = NULL; @@ -1819,10 +1822,21 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, status = -ENOSPC; /* for now, the chain search is a bit simplistic. We just use * the 1st group with any empty bits. */ - while ((status = ac->ac_group_search(alloc_inode, group_bh, - bits_wanted, min_bits, - ac->ac_max_block, - res)) == -ENOSPC) { + while (1) { + if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG) { + contig_bits = le16_to_cpu(bg->bg_contig_free_bits); + if (!contig_bits) + contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap, + le16_to_cpu(bg->bg_bits), 0); + if (bits_wanted > contig_bits && contig_bits >= min_bits) + bits_wanted = contig_bits; + } + + status = ac->ac_group_search(alloc_inode, group_bh, + bits_wanted, min_bits, + ac->ac_max_block, res); + if (status != -ENOSPC) + break; if (!bg->bg_next_group) break; @@ -1982,6 +1996,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; +search: status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); if (!status) { @@ -2022,6 +2037,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, } } + /* Chains can't supply the bits_wanted contiguous space. + * We should switch to using every single bit when allocating + * from the global bitmap. */ + if (i == le16_to_cpu(cl->cl_next_free_rec) && + status == -ENOSPC && ac->ac_which == OCFS2_AC_USE_MAIN) { + ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG; + ac->ac_chain = victim; + goto search; + } + set_hint: if (status != -ENOSPC) { /* If the next search of this group is not likely to @@ -2365,7 +2390,8 @@ int __ocfs2_claim_clusters(handle_t *handle, BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL - && ac->ac_which != OCFS2_AC_USE_MAIN); + && ac->ac_which != OCFS2_AC_USE_MAIN + && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG); if (ac->ac_which == OCFS2_AC_USE_LOCAL) { WARN_ON(min_clusters > 1); diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index b481b834857d..bcf2ed4a8631 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -29,6 +29,7 @@ struct ocfs2_alloc_context { #define OCFS2_AC_USE_MAIN 2 #define OCFS2_AC_USE_INODE 3 #define OCFS2_AC_USE_META 4 +#define OCFS2_AC_USE_MAIN_DISCONTIG 5 u32 ac_which; /* these are used by the chain search */ From 00a241f528427b63c415a410293b86e66098888e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 16 Apr 2025 18:09:50 -0700 Subject: [PATCH 498/559] x86: disable image size check for test builds 64-bit allyesconfig builds fail with x86_64-linux-ld: kernel image bigger than KERNEL_IMAGE_SIZE Bisect points to commit 6f110a5e4f99 ("Disable SLUB_TINY for build testing") as the responsible commit. Reverting that patch does indeed fix the problem. Further analysis shows that disabling SLUB_TINY enables KASAN, and that KASAN is responsible for the image size increase. Solve the build problem by disabling the image size check for test builds. [akpm@linux-foundation.org: add comment, fix nearby typo (sink->sync)] [akpm@linux-foundation.org: fix comment snafu Link: https://lore.kernel.org/oe-kbuild-all/202504191813.4r9H6Glt-lkp@intel.com/ Link: https://lkml.kernel.org/r/20250417010950.2203847-1-linux@roeck-us.net Fixes: 6f110a5e4f99 ("Disable SLUB_TINY for build testing") Signed-off-by: Guenter Roeck Cc: Linus Torvalds Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Borislav Betkov Cc: Dmitriy Vyukov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleinxer Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton --- arch/x86/kernel/vmlinux.lds.S | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index ccdc45e5b759..aa4d0221583c 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -466,10 +466,18 @@ SECTIONS } /* - * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: + * COMPILE_TEST kernels can be large - CONFIG_KASAN, for example, can cause + * this. Let's assume that nobody will be running a COMPILE_TEST kernel and + * let's assert that fuller build coverage is more valuable than being able to + * run a COMPILE_TEST kernel. + */ +#ifndef CONFIG_COMPILE_TEST +/* + * The ASSERT() sync to . is intentional, for binutils 2.14 compatibility: */ . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), "kernel image bigger than KERNEL_IMAGE_SIZE"); +#endif /* needed for Clang - see arch/x86/entry/entry.S */ PROVIDE(__ref_stack_chk_guard = __stack_chk_guard); From e81224f0ba22e13038381d41a8356ab256d56f23 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 18 Apr 2025 16:00:52 +0100 Subject: [PATCH 499/559] MAINTAINERS: add reverse mapping section Separate out the reverse mapping part of memory management and assign appropriate maintainers and reviewers. David has long been invovled in work with the reverse mapping and continues to do so, so is well suited to maintain this area of the kernel. I have a lot of experience working with the anonymous reverse mapping and continue to work in this area, and also have good knowledge of the walking code and code related to VMAs. This helps people identify who to ask for help, and also additionally makes life easier in review. Link: https://lkml.kernel.org/r/20250418150052.299220-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Liam R. Howlett Acked-by: Harry Yoo Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton --- MAINTAINERS | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 69511c3b2b76..c90842b4239e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15505,7 +15505,6 @@ F: include/linux/mm_*.h F: include/linux/mmzone.h F: include/linux/mmu_notifier.h F: include/linux/pagewalk.h -F: include/linux/rmap.h F: include/trace/events/ksm.h F: mm/ F: tools/mm/ @@ -15545,6 +15544,19 @@ F: mm/page_alloc.c F: include/linux/gfp.h F: include/linux/compaction.h +MEMORY MANAGEMENT - RMAP (REVERSE MAPPING) +M: Andrew Morton +M: David Hildenbrand +M: Lorenzo Stoakes +R: Rik van Riel +R: Liam R. Howlett +R: Vlastimil Babka +R: Harry Yoo +L: linux-mm@kvack.org +S: Maintained +F: include/linux/rmap.h +F: mm/rmap.c + MEMORY MANAGEMENT - SECRETMEM M: Andrew Morton M: Mike Rapoport From be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7 Mon Sep 17 00:00:00 2001 From: Gavin Guo Date: Mon, 21 Apr 2025 19:35:36 +0800 Subject: [PATCH 500/559] mm/huge_memory: fix dereferencing invalid pmd migration entry When migrating a THP, concurrent access to the PMD migration entry during a deferred split scan can lead to an invalid address access, as illustrated below. To prevent this invalid access, it is necessary to check the PMD migration entry and return early. In this context, there is no need to use pmd_to_swp_entry and pfn_swap_entry_to_page to verify the equality of the target folio. Since the PMD migration entry is locked, it cannot be served as the target. Mailing list discussion and explanation from Hugh Dickins: "An anon_vma lookup points to a location which may contain the folio of interest, but might instead contain another folio: and weeding out those other folios is precisely what the "folio != pmd_folio((*pmd)" check (and the "risk of replacing the wrong folio" comment a few lines above it) is for." BUG: unable to handle page fault for address: ffffea60001db008 CPU: 0 UID: 0 PID: 2199114 Comm: tee Not tainted 6.14.0+ #4 NONE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:split_huge_pmd_locked+0x3b5/0x2b60 Call Trace: try_to_migrate_one+0x28c/0x3730 rmap_walk_anon+0x4f6/0x770 unmap_folio+0x196/0x1f0 split_huge_page_to_list_to_order+0x9f6/0x1560 deferred_split_scan+0xac5/0x12a0 shrinker_debugfs_scan_write+0x376/0x470 full_proxy_write+0x15c/0x220 vfs_write+0x2fc/0xcb0 ksys_write+0x146/0x250 do_syscall_64+0x6a/0x120 entry_SYSCALL_64_after_hwframe+0x76/0x7e The bug is found by syzkaller on an internal kernel, then confirmed on upstream. Link: https://lkml.kernel.org/r/20250421113536.3682201-1-gavinguo@igalia.com Link: https://lore.kernel.org/all/20250414072737.1698513-1-gavinguo@igalia.com/ Link: https://lore.kernel.org/all/20250418085802.2973519-1-gavinguo@igalia.com/ Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path") Signed-off-by: Gavin Guo Acked-by: David Hildenbrand Acked-by: Hugh Dickins Acked-by: Zi Yan Reviewed-by: Gavin Shan Cc: Florent Revest Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2a47682d1ab7..47d76d03ce30 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3075,6 +3075,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, bool freeze, struct folio *folio) { + bool pmd_migration = is_pmd_migration_entry(*pmd); + VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio)); VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE)); VM_WARN_ON_ONCE(folio && !folio_test_locked(folio)); @@ -3085,9 +3087,12 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, * require a folio to check the PMD against. Otherwise, there * is a risk of replacing the wrong folio. */ - if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || - is_pmd_migration_entry(*pmd)) { - if (folio && folio != pmd_folio(*pmd)) + if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || pmd_migration) { + /* + * Do not apply pmd_folio() to a migration entry; and folio lock + * guarantees that it must be of the wrong folio anyway. + */ + if (folio && (pmd_migration || folio != pmd_folio(*pmd))) return; __split_huge_pmd_locked(vma, pmd, address, freeze); } From 31d4cd4eb2f8d9b87ebfa6a5e443a59e3b3d7b8c Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Fri, 11 Apr 2025 11:31:24 -0500 Subject: [PATCH 501/559] ocfs2: fix panic in failed foilio allocation commit 7e119cff9d0a ("ocfs2: convert w_pages to w_folios") and commit 9a5e08652dc4b ("ocfs2: use an array of folios instead of an array of pages") save -ENOMEM in the folio array upon allocation failure and call the folio array free code. The folio array free code expects either valid folio pointers or NULL. Finding the -ENOMEM will result in a panic. Fix by NULLing the error folio entry. Link: https://lkml.kernel.org/r/c879a52b-835c-4fa0-902b-8b2e9196dcbd@oracle.com Fixes: 7e119cff9d0a ("ocfs2: convert w_pages to w_folios") Fixes: 9a5e08652dc4b ("ocfs2: use an array of folios instead of an array of pages") Signed-off-by: Mark Tinguely Reviewed-by: Matthew Wilcox (Oracle) Cc: Changwei Ge Cc: Joel Becker Cc: Junxiao Bi Cc: Mark Fasheh Cc: Nathan Chancellor Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index b8ac85b548c7..821cb7874685 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6918,6 +6918,7 @@ static int ocfs2_grab_folios(struct inode *inode, loff_t start, loff_t end, if (IS_ERR(folios[numfolios])) { ret = PTR_ERR(folios[numfolios]); mlog_errno(ret); + folios[numfolios] = NULL; goto out; } From a47694ecb8bcf2d60d665eaade913f4f59956c44 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 23 Apr 2025 13:30:42 +0100 Subject: [PATCH 502/559] MAINTAINERS: add core mm section In furtherance of ongoing efforts to ensure people are aware of who de-facto maintains/has an interest in specific parts of mm, as well trying to avoid get_maintainers.pl listing only Andrew and the mailing list for mm files - establish a 'core' memory management section establishing David as co-maintainer alongside Andrew (thanks David for volunteering!) along with a number of relevant reviewers. We try to keep things as fine-grained as possible, so we place only obviously 'general' mm things here. For files which are specific to a particular part of mm, we prefer new entries. Link: https://lkml.kernel.org/r/20250423123042.59082-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Mike Rapoport (Microsoft) Acked-by: Liam R. Howlett Acked-by: David Hildenbrand Cc: Lorenzo Stoakes Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- MAINTAINERS | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c90842b4239e..5b5d07abdf32 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15494,14 +15494,11 @@ F: Documentation/mm/ F: include/linux/gfp.h F: include/linux/gfp_types.h F: include/linux/memfd.h -F: include/linux/memory.h F: include/linux/memory_hotplug.h F: include/linux/memory-tiers.h F: include/linux/mempolicy.h F: include/linux/mempool.h F: include/linux/memremap.h -F: include/linux/mm.h -F: include/linux/mm_*.h F: include/linux/mmzone.h F: include/linux/mmu_notifier.h F: include/linux/pagewalk.h @@ -15511,6 +15508,31 @@ F: tools/mm/ F: tools/testing/selftests/mm/ N: include/linux/page[-_]* +MEMORY MANAGEMENT - CORE +M: Andrew Morton +M: David Hildenbrand +R: Lorenzo Stoakes +R: Liam R. Howlett +R: Vlastimil Babka +R: Mike Rapoport +R: Suren Baghdasaryan +R: Michal Hocko +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: include/linux/memory.h +F: include/linux/mm.h +F: include/linux/mm_*.h +F: include/linux/mmdebug.h +F: include/linux/pagewalk.h +F: mm/Kconfig +F: mm/debug.c +F: mm/init-mm.c +F: mm/memory.c +F: mm/pagewalk.c +F: mm/util.c + MEMORY MANAGEMENT - EXECMEM M: Andrew Morton M: Mike Rapoport From ab00ddd802f80e31fc9639c652d736fe3913feae Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 23 Apr 2025 18:36:45 +0800 Subject: [PATCH 503/559] selftests/mm: compaction_test: support platform with huge mount of memory When running mm selftest to verify mm patches, 'compaction_test' case failed on an x86 server with 1TB memory. And the root cause is that it has too much free memory than what the test supports. The test case tries to allocate 100000 huge pages, which is about 200 GB for that x86 server, and when it succeeds, it expects it's large than 1/3 of 80% of the free memory in system. This logic only works for platform with 750 GB ( 200 / (1/3) / 80% ) or less free memory, and may raise false alarm for others. Fix it by changing the fixed page number to self-adjustable number according to the real number of free memory. Link: https://lkml.kernel.org/r/20250423103645.2758-1-feng.tang@linux.alibaba.com Fixes: bd67d5c15cc1 ("Test compaction of mlocked memory") Signed-off-by: Feng Tang Acked-by: Dev Jain Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Shuah Khan Cc: Sri Jayaramappa Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/compaction_test.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index 2c3a0eb6b22d..9bc4591c7b16 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -90,6 +90,8 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, int compaction_index = 0; char nr_hugepages[20] = {0}; char init_nr_hugepages[24] = {0}; + char target_nr_hugepages[24] = {0}; + int slen; snprintf(init_nr_hugepages, sizeof(init_nr_hugepages), "%lu", initial_nr_hugepages); @@ -106,11 +108,18 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, goto out; } - /* Request a large number of huge pages. The Kernel will allocate - as much as it can */ - if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { - ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n", - strerror(errno)); + /* + * Request huge pages for about half of the free memory. The Kernel + * will allocate as much as it can, and we expect it will get at least 1/3 + */ + nr_hugepages_ul = mem_free / hugepage_size / 2; + snprintf(target_nr_hugepages, sizeof(target_nr_hugepages), + "%lu", nr_hugepages_ul); + + slen = strlen(target_nr_hugepages); + if (write(fd, target_nr_hugepages, slen) != slen) { + ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n", + nr_hugepages_ul, strerror(errno)); goto close_fd; } From 95567729173e62e0e60a1f8ad9eb2e1320a8ccac Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 24 Apr 2025 17:57:28 -0400 Subject: [PATCH 504/559] mm/userfaultfd: fix uninitialized output field for -EAGAIN race While discussing some userfaultfd relevant issues recently, Andrea noticed a potential ABI breakage with -EAGAIN on almost all userfaultfd ioctl()s. Quote from Andrea, explaining how -EAGAIN was processed, and how this should fix it (taking example of UFFDIO_COPY ioctl): The "mmap_changing" and "stale pmd" conditions are already reported as -EAGAIN written in the copy field, this does not change it. This change removes the subnormal case that left copy.copy uninitialized and required apps to explicitly set the copy field to get deterministic behavior (which is a requirement contrary to the documentation in both the manpage and source code). In turn there's no alteration to backwards compatibility as result of this change because userland will find the copy field consistently set to -EAGAIN, and not anymore sometime -EAGAIN and sometime uninitialized. Even then the change only can make a difference to non cooperative users of userfaultfd, so when UFFD_FEATURE_EVENT_* is enabled, which is not true for the vast majority of apps using userfaultfd or this unintended uninitialized field may have been noticed sooner. Meanwhile, since this bug existed for years, it also almost affects all ioctl()s that was introduced later. Besides UFFDIO_ZEROPAGE, these also get affected in the same way: - UFFDIO_CONTINUE - UFFDIO_POISON - UFFDIO_MOVE This patch should have fixed all of them. Link: https://lkml.kernel.org/r/20250424215729.194656-2-peterx@redhat.com Fixes: df2cc96e7701 ("userfaultfd: prevent non-cooperative events vs mcopy_atomic races") Fixes: f619147104c8 ("userfaultfd: add UFFDIO_CONTINUE ioctl") Fixes: fc71884a5f59 ("mm: userfaultfd: add new UFFDIO_POISON ioctl") Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Peter Xu Reported-by: Andrea Arcangeli Suggested-by: Andrea Arcangeli Reviewed-by: David Hildenbrand Cc: Mike Rapoport Cc: Axel Rasmussen Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- fs/userfaultfd.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index d80f94346199..22f4bf956ba1 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1585,8 +1585,11 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, user_uffdio_copy = (struct uffdio_copy __user *) arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_copy->copy))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_copy, user_uffdio_copy, @@ -1641,8 +1644,11 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage, @@ -1744,8 +1750,11 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) user_uffdio_continue = (struct uffdio_continue __user *)arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_continue->mapped))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_continue, user_uffdio_continue, @@ -1801,8 +1810,11 @@ static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long user_uffdio_poison = (struct uffdio_poison __user *)arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_poison->updated))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_poison, user_uffdio_poison, @@ -1870,8 +1882,12 @@ static int userfaultfd_move(struct userfaultfd_ctx *ctx, user_uffdio_move = (struct uffdio_move __user *) arg; - if (atomic_read(&ctx->mmap_changing)) - return -EAGAIN; + ret = -EAGAIN; + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_move->move))) + return -EFAULT; + goto out; + } if (copy_from_user(&uffdio_move, user_uffdio_move, /* don't copy "move" last field */ From 09fc97b3abe9e72c799a1c757acc47e746844619 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 24 Apr 2025 12:16:32 +0100 Subject: [PATCH 505/559] MAINTAINERS: add mm THP section As part of the ongoing efforts to sub-divide memory management maintainership and reviewership, establish a section for Transparent Huge Page support and add appropriate maintainers and reviewers. [lorenzo.stoakes@oracle.com: add Dev Jain as THP reviewer] Link: https://lkml.kernel.org/r/327e6f2f-0f0f-48af-9ca2-3f8cadf0d8bf@lucifer.local Link: https://lkml.kernel.org/r/20250424111632.103637-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: David Hildenbrand Reviewed-by: Ryan Roberts Reviewed-by: Baolin Wang Acked-by: Zi Yan Cc: Dev Jain Cc: Liam Howlett Cc: Mariano Pache Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- MAINTAINERS | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5b5d07abdf32..f62b266630cf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15587,6 +15587,30 @@ S: Maintained F: include/linux/secretmem.h F: mm/secretmem.c +MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE) +M: Andrew Morton +M: David Hildenbrand +R: Zi Yan +R: Baolin Wang +R: Lorenzo Stoakes +R: Liam R. Howlett +R: Nico Pache +R: Ryan Roberts +R: Dev Jain +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: Documentation/admin-guide/mm/transhuge.rst +F: include/linux/huge_mm.h +F: include/linux/khugepaged.h +F: include/trace/events/huge_memory.h +F: mm/huge_memory.c +F: mm/khugepaged.c +F: tools/testing/selftests/mm/khugepaged.c +F: tools/testing/selftests/mm/split_huge_page_test.c +F: tools/testing/selftests/mm/transhuge-stress.c + MEMORY MANAGEMENT - USERFAULTFD M: Andrew Morton R: Peter Xu From 80fbee76ebbdf92acd9f293178850bf0b103a6b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 24 Apr 2025 08:02:51 +0200 Subject: [PATCH 506/559] mailmap: map Uwe's BayLibre addresses to a single one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When I started working for BayLibre I wasn't aware that the mailserver rewrote the sender address and so a few commits entered kernel history with a working but unexpected address. Map the unexpected to the intended one. This also makes the author of those commits (e.g. 32b4f1a4f07f ("pwm: jz4740: Another few conversions to regmap_{set,clear}_bits()")) match the address used in the sign-off line. Link: https://lkml.kernel.org/r/20250424060250.3085683-2-u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 6f0a4e965d6a..4f661e434eaa 100644 --- a/.mailmap +++ b/.mailmap @@ -751,6 +751,7 @@ Tvrtko Ursulin Tycho Andersen Tzung-Bi Shih Uwe Kleine-König +Uwe Kleine-König Uwe Kleine-König Uwe Kleine-König Uwe Kleine-König From c0fb83088f0cc4ee4706e0495ee8b06f49daa716 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 24 Apr 2025 15:45:11 +0200 Subject: [PATCH 507/559] ocfs2: switch osb->disable_recovery to enum Patch series "ocfs2: Fix deadlocks in quota recovery", v3. This implements another approach to fixing quota recovery deadlocks. We avoid grabbing sb->s_umount semaphore from ocfs2_finish_quota_recovery() and instead stop quota recovery early in ocfs2_dismount_volume(). This patch (of 3): We will need more recovery states than just pure enable / disable to fix deadlocks with quota recovery. Switch osb->disable_recovery to enum. Link: https://lkml.kernel.org/r/20250424134301.1392-1-jack@suse.cz Link: https://lkml.kernel.org/r/20250424134515.18933-4-jack@suse.cz Fixes: 5f530de63cfc ("ocfs2: Use s_umount for quota recovery protection") Signed-off-by: Jan Kara Reviewed-by: Heming Zhao Tested-by: Heming Zhao Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Murad Masimov Cc: Shichangkuo Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/journal.c | 14 ++++++++------ fs/ocfs2/ocfs2.h | 7 ++++++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index c7a9729dc9d0..e79f10cc43d9 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb) struct ocfs2_recovery_map *rm; mutex_init(&osb->recovery_lock); - osb->disable_recovery = 0; + osb->recovery_state = OCFS2_REC_ENABLED; osb->recovery_thread_task = NULL; init_waitqueue_head(&osb->recovery_event); @@ -206,7 +206,7 @@ void ocfs2_recovery_exit(struct ocfs2_super *osb) /* disable any new recovery threads and wait for any currently * running ones to exit. Do this before setting the vol_state. */ mutex_lock(&osb->recovery_lock); - osb->disable_recovery = 1; + osb->recovery_state = OCFS2_REC_DISABLED; mutex_unlock(&osb->recovery_lock); wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); @@ -1582,14 +1582,16 @@ bail: void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) { + int was_set = -1; + mutex_lock(&osb->recovery_lock); + if (osb->recovery_state < OCFS2_REC_DISABLED) + was_set = ocfs2_recovery_map_set(osb, node_num); trace_ocfs2_recovery_thread(node_num, osb->node_num, - osb->disable_recovery, osb->recovery_thread_task, - osb->disable_recovery ? - -1 : ocfs2_recovery_map_set(osb, node_num)); + osb->recovery_state, osb->recovery_thread_task, was_set); - if (osb->disable_recovery) + if (osb->recovery_state == OCFS2_REC_DISABLED) goto out; if (osb->recovery_thread_task) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 51c52768132d..e713361939f0 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -308,6 +308,11 @@ enum ocfs2_journal_trigger_type { void ocfs2_initialize_journal_triggers(struct super_block *sb, struct ocfs2_triggers triggers[]); +enum ocfs2_recovery_state { + OCFS2_REC_ENABLED = 0, + OCFS2_REC_DISABLED, +}; + struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; @@ -370,7 +375,7 @@ struct ocfs2_super struct ocfs2_recovery_map *recovery_map; struct ocfs2_replay_map *replay_map; struct task_struct *recovery_thread_task; - int disable_recovery; + enum ocfs2_recovery_state recovery_state; wait_queue_head_t checkpoint_event; struct ocfs2_journal *journal; unsigned long osb_commit_interval; From 8f947e0fd595951460f5a6e1ac29baa82fa02eab Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 24 Apr 2025 15:45:12 +0200 Subject: [PATCH 508/559] ocfs2: implement handshaking with ocfs2 recovery thread We will need ocfs2 recovery thread to acknowledge transitions of recovery_state when disabling particular types of recovery. This is similar to what currently happens when disabling recovery completely, just more general. Implement the handshake and use it for exit from recovery. Link: https://lkml.kernel.org/r/20250424134515.18933-5-jack@suse.cz Fixes: 5f530de63cfc ("ocfs2: Use s_umount for quota recovery protection") Signed-off-by: Jan Kara Reviewed-by: Heming Zhao Tested-by: Heming Zhao Acked-by: Joseph Qi Cc: Changwei Ge Cc: Joel Becker Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Cc: Murad Masimov Cc: Shichangkuo Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/journal.c | 52 +++++++++++++++++++++++++++++++--------------- fs/ocfs2/ocfs2.h | 4 ++++ 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index e79f10cc43d9..2987df396ad8 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -190,31 +190,48 @@ int ocfs2_recovery_init(struct ocfs2_super *osb) return 0; } -/* we can't grab the goofy sem lock from inside wait_event, so we use - * memory barriers to make sure that we'll see the null task before - * being woken up */ static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) { - mb(); return osb->recovery_thread_task != NULL; } +static void ocfs2_recovery_disable(struct ocfs2_super *osb, + enum ocfs2_recovery_state state) +{ + mutex_lock(&osb->recovery_lock); + /* + * If recovery thread is not running, we can directly transition to + * final state. + */ + if (!ocfs2_recovery_thread_running(osb)) { + osb->recovery_state = state + 1; + goto out_lock; + } + osb->recovery_state = state; + /* Wait for recovery thread to acknowledge state transition */ + wait_event_cmd(osb->recovery_event, + !ocfs2_recovery_thread_running(osb) || + osb->recovery_state >= state + 1, + mutex_unlock(&osb->recovery_lock), + mutex_lock(&osb->recovery_lock)); +out_lock: + mutex_unlock(&osb->recovery_lock); + + /* + * At this point we know that no more recovery work can be queued so + * wait for any recovery completion work to complete. + */ + if (osb->ocfs2_wq) + flush_workqueue(osb->ocfs2_wq); +} + void ocfs2_recovery_exit(struct ocfs2_super *osb) { struct ocfs2_recovery_map *rm; /* disable any new recovery threads and wait for any currently * running ones to exit. Do this before setting the vol_state. */ - mutex_lock(&osb->recovery_lock); - osb->recovery_state = OCFS2_REC_DISABLED; - mutex_unlock(&osb->recovery_lock); - wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); - - /* At this point, we know that no more recovery threads can be - * launched, so wait for any recovery completion work to - * complete. */ - if (osb->ocfs2_wq) - flush_workqueue(osb->ocfs2_wq); + ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE); /* * Now that recovery is shut down, and the osb is about to be @@ -1569,7 +1586,8 @@ bail: ocfs2_free_replay_slots(osb); osb->recovery_thread_task = NULL; - mb(); /* sync with ocfs2_recovery_thread_running */ + if (osb->recovery_state == OCFS2_REC_WANT_DISABLE) + osb->recovery_state = OCFS2_REC_DISABLED; wake_up(&osb->recovery_event); mutex_unlock(&osb->recovery_lock); @@ -1585,13 +1603,13 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) int was_set = -1; mutex_lock(&osb->recovery_lock); - if (osb->recovery_state < OCFS2_REC_DISABLED) + if (osb->recovery_state < OCFS2_REC_WANT_DISABLE) was_set = ocfs2_recovery_map_set(osb, node_num); trace_ocfs2_recovery_thread(node_num, osb->node_num, osb->recovery_state, osb->recovery_thread_task, was_set); - if (osb->recovery_state == OCFS2_REC_DISABLED) + if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE) goto out; if (osb->recovery_thread_task) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index e713361939f0..70b2d5c8c228 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -310,6 +310,10 @@ void ocfs2_initialize_journal_triggers(struct super_block *sb, enum ocfs2_recovery_state { OCFS2_REC_ENABLED = 0, + OCFS2_REC_WANT_DISABLE, + /* + * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work + */ OCFS2_REC_DISABLED, }; From fcaf3b2683b05a9684acdebda706a12025a6927a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 24 Apr 2025 15:45:13 +0200 Subject: [PATCH 509/559] ocfs2: stop quota recovery before disabling quotas Currently quota recovery is synchronized with unmount using sb->s_umount semaphore. That is however prone to deadlocks because flush_workqueue(osb->ocfs2_wq) called from umount code can wait for quota recovery to complete while ocfs2_finish_quota_recovery() waits for sb->s_umount semaphore. Grabbing of sb->s_umount semaphore in ocfs2_finish_quota_recovery() is only needed to protect that function from disabling of quotas from ocfs2_dismount_volume(). Handle this problem by disabling quota recovery early during unmount in ocfs2_dismount_volume() instead so that we can drop acquisition of sb->s_umount from ocfs2_finish_quota_recovery(). Link: https://lkml.kernel.org/r/20250424134515.18933-6-jack@suse.cz Fixes: 5f530de63cfc ("ocfs2: Use s_umount for quota recovery protection") Signed-off-by: Jan Kara Reported-by: Shichangkuo Reported-by: Murad Masimov Reviewed-by: Heming Zhao Tested-by: Heming Zhao Acked-by: Joseph Qi Cc: Changwei Ge Cc: Joel Becker Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/journal.c | 20 ++++++++++++++++++-- fs/ocfs2/journal.h | 1 + fs/ocfs2/ocfs2.h | 6 ++++++ fs/ocfs2/quota_local.c | 9 ++------- fs/ocfs2/super.c | 3 +++ 5 files changed, 30 insertions(+), 9 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 2987df396ad8..e5f58ff2175f 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -225,6 +225,11 @@ out_lock: flush_workqueue(osb->ocfs2_wq); } +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb) +{ + ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE); +} + void ocfs2_recovery_exit(struct ocfs2_super *osb) { struct ocfs2_recovery_map *rm; @@ -1489,6 +1494,18 @@ static int __ocfs2_recovery_thread(void *arg) } } restart: + if (quota_enabled) { + mutex_lock(&osb->recovery_lock); + /* Confirm that recovery thread will no longer recover quotas */ + if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) { + osb->recovery_state = OCFS2_REC_QUOTA_DISABLED; + wake_up(&osb->recovery_event); + } + if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED) + quota_enabled = 0; + mutex_unlock(&osb->recovery_lock); + } + status = ocfs2_super_lock(osb, 1); if (status < 0) { mlog_errno(status); @@ -1592,8 +1609,7 @@ bail: mutex_unlock(&osb->recovery_lock); - if (quota_enabled) - kfree(rm_quota); + kfree(rm_quota); return status; } diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index e3c3a35dc5e0..6397170f302f 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb); int ocfs2_recovery_init(struct ocfs2_super *osb); void ocfs2_recovery_exit(struct ocfs2_super *osb); +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb); int ocfs2_compute_replay_slots(struct ocfs2_super *osb); void ocfs2_free_replay_slots(struct ocfs2_super *osb); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 70b2d5c8c228..6aaa94c554c1 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -310,6 +310,12 @@ void ocfs2_initialize_journal_triggers(struct super_block *sb, enum ocfs2_recovery_state { OCFS2_REC_ENABLED = 0, + OCFS2_REC_QUOTA_WANT_DISABLE, + /* + * Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for + * ocfs2_recovery_disable_quota() to work. + */ + OCFS2_REC_QUOTA_DISABLED, OCFS2_REC_WANT_DISABLE, /* * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 2956d888c131..e272429da3db 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -453,8 +453,7 @@ out: /* Sync changes in local quota file into global quota file and * reinitialize local quota file. - * The function expects local quota file to be already locked and - * s_umount locked in shared mode. */ + * The function expects local quota file to be already locked. */ static int ocfs2_recover_local_quota_file(struct inode *lqinode, int type, struct ocfs2_quota_recovery *rec) @@ -588,7 +587,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, { unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, LOCAL_GROUP_QUOTA_SYSTEM_INODE }; - struct super_block *sb = osb->sb; struct ocfs2_local_disk_dqinfo *ldinfo; struct buffer_head *bh; handle_t *handle; @@ -600,7 +598,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for " "slot %u\n", osb->dev_str, slot_num); - down_read(&sb->s_umount); for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (list_empty(&(rec->r_list[type]))) continue; @@ -677,7 +674,6 @@ out_put: break; } out: - up_read(&sb->s_umount); kfree(rec); return status; } @@ -843,8 +839,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); /* - * s_umount held in exclusive mode protects us against racing with - * recovery thread... + * ocfs2_dismount_volume() has already aborted quota recovery... */ if (oinfo->dqi_rec) { ocfs2_free_quota_recovery(oinfo->dqi_rec); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 8bb5022f3082..3d2533950bae 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1812,6 +1812,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) /* Orphan scan should be stopped as early as possible */ ocfs2_orphan_scan_stop(osb); + /* Stop quota recovery so that we can disable quotas */ + ocfs2_recovery_disable_quota(osb); + ocfs2_disable_quotas(osb); /* All dquots should be freed by now */ From a8efadda8649506e80d256cc09656acc0783df2b Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Apr 2025 17:24:36 +0100 Subject: [PATCH 510/559] tools/testing/selftests: fix guard region test tmpfs assumption The current implementation of the guard region tests assume that /tmp is mounted as tmpfs, that is shmem. This isn't always the case, and at least one instance of a spurious test failure has been reported as a result. This assumption is unsafe, rushed and silly - and easily remedied by simply using memfd, so do so. We also have to fixup the readonly_file test to explicitly only be applicable to file-backed cases. Link: https://lkml.kernel.org/r/20250425162436.564002-1-lorenzo.stoakes@oracle.com Fixes: 272f37d3e99a ("tools/selftests: expand all guard region tests to file-backed") Signed-off-by: Lorenzo Stoakes Reported-by: Ryan Roberts Closes: https://lore.kernel.org/linux-mm/a2d2766b-0ab4-437b-951a-8595a7506fe9@arm.com/ Reviewed-by: Ryan Roberts Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/guard-regions.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index b3d0e2771096..eba43ead13ae 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -271,12 +271,16 @@ FIXTURE_SETUP(guard_regions) self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); setup_sighandler(); - if (variant->backing == ANON_BACKED) + switch (variant->backing) { + case ANON_BACKED: return; - - self->fd = open_file( - variant->backing == SHMEM_BACKED ? "/tmp/" : "", - self->path); + case LOCAL_FILE_BACKED: + self->fd = open_file("", self->path); + break; + case SHMEM_BACKED: + self->fd = memfd_create(self->path, 0); + break; + } /* We truncate file to at least 100 pages, tests can modify as needed. */ ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0); @@ -1696,7 +1700,7 @@ TEST_F(guard_regions, readonly_file) char *ptr; int i; - if (variant->backing == ANON_BACKED) + if (variant->backing != LOCAL_FILE_BACKED) SKIP(return, "Read-only test specific to file-backed"); /* Map shared so we can populate with pattern, populate it, unmap. */ From a0309faf1cb0622cac7c820150b7abf2024acff5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Apr 2025 17:11:07 -0700 Subject: [PATCH 511/559] mm: vmalloc: support more granular vrealloc() sizing Introduce struct vm_struct::requested_size so that the requested (re)allocation size is retained separately from the allocated area size. This means that KASAN will correctly poison the correct spans of requested bytes. This also means we can support growing the usable portion of an allocation that can already be supported by the existing area's existing allocation. Link: https://lkml.kernel.org/r/20250426001105.it.679-kees@kernel.org Fixes: 3ddc2fefe6f3 ("mm: vmalloc: implement vrealloc()") Signed-off-by: Kees Cook Reported-by: Erhard Furtner Closes: https://lore.kernel.org/all/20250408192503.6149a816@outsider.home/ Reviewed-by: Danilo Krummrich Cc: Michal Hocko Cc: "Uladzislau Rezki (Sony)" Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/vmalloc.h | 1 + mm/vmalloc.c | 31 ++++++++++++++++++++++++------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 31e9ffd936e3..5ca8d4dd149d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -61,6 +61,7 @@ struct vm_struct { unsigned int nr_pages; phys_addr_t phys_addr; const void *caller; + unsigned long requested_size; }; struct vmap_area { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 3ed720a787ec..2d7511654831 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1940,7 +1940,7 @@ static inline void setup_vmalloc_vm(struct vm_struct *vm, { vm->flags = flags; vm->addr = (void *)va->va_start; - vm->size = va_size(va); + vm->size = vm->requested_size = va_size(va); vm->caller = caller; va->vm = vm; } @@ -3133,6 +3133,7 @@ struct vm_struct *__get_vm_area_node(unsigned long size, area->flags = flags; area->caller = caller; + area->requested_size = requested_size; va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0, area); if (IS_ERR(va)) { @@ -4063,6 +4064,8 @@ EXPORT_SYMBOL(vzalloc_node_noprof); */ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) { + struct vm_struct *vm = NULL; + size_t alloced_size = 0; size_t old_size = 0; void *n; @@ -4072,15 +4075,17 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) } if (p) { - struct vm_struct *vm; - vm = find_vm_area(p); if (unlikely(!vm)) { WARN(1, "Trying to vrealloc() nonexistent vm area (%p)\n", p); return NULL; } - old_size = get_vm_area_size(vm); + alloced_size = get_vm_area_size(vm); + old_size = vm->requested_size; + if (WARN(alloced_size < old_size, + "vrealloc() has mismatched area vs requested sizes (%p)\n", p)) + return NULL; } /* @@ -4088,14 +4093,26 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) * would be a good heuristic for when to shrink the vm_area? */ if (size <= old_size) { - /* Zero out spare memory. */ - if (want_init_on_alloc(flags)) + /* Zero out "freed" memory. */ + if (want_init_on_free()) memset((void *)p + size, 0, old_size - size); + vm->requested_size = size; kasan_poison_vmalloc(p + size, old_size - size); - kasan_unpoison_vmalloc(p, size, KASAN_VMALLOC_PROT_NORMAL); return (void *)p; } + /* + * We already have the bytes available in the allocation; use them. + */ + if (size <= alloced_size) { + kasan_unpoison_vmalloc(p + old_size, size - old_size, + KASAN_VMALLOC_PROT_NORMAL); + /* Zero out "alloced" memory. */ + if (want_init_on_alloc(flags)) + memset((void *)p + old_size, 0, size - old_size); + vm->requested_size = size; + } + /* TODO: Grow the vm_area, i.e. allocate and map additional pages. */ n = __vmalloc_noprof(size, flags); if (!n) From 22adb528621ddc92f887882a658507fbf88a5214 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 28 Apr 2025 18:49:34 +0530 Subject: [PATCH 512/559] selftests/mm: fix build break when compiling pkey_util.c Commit 50910acd6f615 ("selftests/mm: use sys_pkey helpers consistently") added a pkey_util.c to refactor some of the protection_keys functions accessible by other tests. But this broken the build in powerpc in two ways, pkey-powerpc.h: In function `arch_is_powervm': pkey-powerpc.h:73:21: error: storage size of `buf' isn't known 73 | struct stat buf; | ^~~ pkey-powerpc.h:75:14: error: implicit declaration of function `stat'; did you mean `strcat'? [-Wimplicit-function-declaration] 75 | if ((stat("/sys/firmware/devicetree/base/ibm,partition-name", &buf) == 0) && | ^~~~ | strcat Since pkey_util.c includes pkeys-helper.h, which in turn includes pkeys-powerpc.h, stat.h including is missing for "struct stat". This is fixed by adding "sys/stat.h" in pkeys-powerpc.h Secondly, pkey-powerpc.h:55:18: warning: format `%llx' expects argument of type `long long unsigned int', but argument 3 has type `u64' {aka `long unsigned int'} [-Wformat=] 55 | dprintf4("%s() changing %016llx to %016llx\n", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 56 | __func__, __read_pkey_reg(), pkey_reg); | ~~~~~~~~~~~~~~~~~ | | | u64 {aka long unsigned int} pkey-helpers.h:63:32: note: in definition of macro `dprintf_level' 63 | sigsafe_printf(args); \ | ^~~~ These format specifier related warning are removed by adding "__SANE_USERSPACE_TYPES__" to pkeys_utils.c. Link: https://lkml.kernel.org/r/20250428131937.641989-1-nysal@linux.ibm.com Fixes: 50910acd6f61 ("selftests/mm: use sys_pkey helpers consistently") Signed-off-by: Madhavan Srinivasan Signed-off-by: Nysal Jan K.A. Tested-by: Venkat Rao Bagalkote Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/pkey-powerpc.h | 2 ++ tools/testing/selftests/mm/pkey_util.c | 1 + 2 files changed, 3 insertions(+) diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index 1bad310d282a..d8ec906b8120 100644 --- a/tools/testing/selftests/mm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -3,6 +3,8 @@ #ifndef _PKEYS_POWERPC_H #define _PKEYS_POWERPC_H +#include + #ifndef SYS_pkey_alloc # define SYS_pkey_alloc 384 # define SYS_pkey_free 385 diff --git a/tools/testing/selftests/mm/pkey_util.c b/tools/testing/selftests/mm/pkey_util.c index ca4ad0d44ab2..255b332f7a08 100644 --- a/tools/testing/selftests/mm/pkey_util.c +++ b/tools/testing/selftests/mm/pkey_util.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#define __SANE_USERSPACE_TYPES__ #include #include From 8cf6ecb18baac867585fe1cba5dde6dbf3b6d29a Mon Sep 17 00:00:00 2001 From: "Nysal Jan K.A." Date: Mon, 28 Apr 2025 18:49:35 +0530 Subject: [PATCH 513/559] selftests/mm: fix a build failure on powerpc The compiler is unaware of the size of code generated by the ".rept" assembler directive. This results in the compiler emitting branch instructions where the offset to branch to exceeds the maximum allowed value, resulting in build failures like the following: CC protection_keys /tmp/ccypKWAE.s: Assembler messages: /tmp/ccypKWAE.s:2073: Error: operand out of range (0x0000000000020158 is not between 0xffffffffffff8000 and 0x0000000000007ffc) /tmp/ccypKWAE.s:2509: Error: operand out of range (0x0000000000020130 is not between 0xffffffffffff8000 and 0x0000000000007ffc) Fix the issue by manually adding nop instructions using the preprocessor. Link: https://lkml.kernel.org/r/20250428131937.641989-2-nysal@linux.ibm.com Fixes: 46036188ea1f ("selftests/mm: build with -O2") Reported-by: Madhavan Srinivasan Signed-off-by: Nysal Jan K.A. Tested-by: Venkat Rao Bagalkote Reviewed-by: Donet Tom Tested-by: Donet Tom Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/pkey-powerpc.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index d8ec906b8120..17bf2d1b0192 100644 --- a/tools/testing/selftests/mm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -104,8 +104,18 @@ static inline void expect_fault_on_read_execonly_key(void *p1, int pkey) return; } +#define REPEAT_8(s) s s s s s s s s +#define REPEAT_64(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) \ + REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) +#define REPEAT_512(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) \ + REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) +#define REPEAT_4096(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) \ + REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) +#define REPEAT_16384(s) REPEAT_4096(s) REPEAT_4096(s) \ + REPEAT_4096(s) REPEAT_4096(s) + /* 4-byte instructions * 16384 = 64K page */ -#define __page_o_noops() asm(".rept 16384 ; nop; .endr") +#define __page_o_noops() asm(REPEAT_16384("nop\n")) static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) { From 9a9794a81a8a1be8670aa673ec0f0fbfbddeccae Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 29 Apr 2025 17:48:03 +0800 Subject: [PATCH 514/559] mm, swap: fix false warning for large allocation with !THP_SWAP The !CONFIG_THP_SWAP check existed before just fine because slot cache would reject high order allocation and let the caller split all folios and try again. But slot cache is gone, so large allocation will directly go to the allocator, and the allocator should just fail silently to inform caller to do the folio split, this is totally fine and expected. Remove this meaningless warning. Link: https://lkml.kernel.org/r/20250429094803.85518-1-ryncsn@gmail.com Fixes: 0ff67f990bd4 ("mm, swap: remove swap slot cache") Signed-off-by: Kairui Song Reported-by: Heiko Carstens Closes: https://lore.kernel.org/linux-mm/20250428135252.25453B17-hca@linux.ibm.com/ Tested-by: Heiko Carstens Cc: Baoquan He Signed-off-by: Andrew Morton --- mm/swapfile.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 2eff8b51a945..f214843612dc 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1272,13 +1272,22 @@ int folio_alloc_swap(struct folio *folio, gfp_t gfp) VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio); - /* - * Should not even be attempting large allocations when huge - * page swap is disabled. Warn and fail the allocation. - */ - if (order && (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER)) { - VM_WARN_ON_ONCE(1); - return -EINVAL; + if (order) { + /* + * Reject large allocation when THP_SWAP is disabled, + * the caller should split the folio and try again. + */ + if (!IS_ENABLED(CONFIG_THP_SWAP)) + return -EAGAIN; + + /* + * Allocation size should never exceed cluster size + * (HPAGE_PMD_SIZE). + */ + if (size > SWAPFILE_CLUSTER) { + VM_WARN_ON_ONCE(1); + return -EINVAL; + } } local_lock(&percpu_swap_cluster.lock); From dac2a4f663c4bcd75add07aedf9d30c9f8e5ead5 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Thu, 1 May 2025 04:43:24 +0000 Subject: [PATCH 515/559] mm/hugetlb: copy the CMA flag when demoting Since commit d2d786714080 ("mm/hugetlb: enable bootmem allocation from CMA areas"), a flag is used to mark hugetlb folios as allocated from CMA. This flag is also used to decide if it should be freed to CMA. However, the flag isn't copied to the smaller folios when a hugetlb folio is broken up for demotion, which would cause it to be freed incorrectly. Fix this by copying the flag to the smaller order hugetlb pages created from the original one. Link: https://lkml.kernel.org/r/20250501044325.20365-1-fvdl@google.com Fixes: d2d786714080 ("mm/hugetlb: enable bootmem allocation from CMA areas") Signed-off-by: Frank van der Linden Reviewed-by: Anshuman Khandual Reviewed-by: Oscar Salvador Reviewed-by: Jane Chu Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e3e6ac991b9c..6ea1be71aa42 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4034,10 +4034,13 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst, list_for_each_entry_safe(folio, next, src_list, lru) { int i; + bool cma; if (folio_test_hugetlb_vmemmap_optimized(folio)) continue; + cma = folio_test_hugetlb_cma(folio); + list_del(&folio->lru); split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst)); @@ -4053,6 +4056,9 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst, new_folio->mapping = NULL; init_new_hugetlb_folio(dst, new_folio); + /* Copy the CMA flag so that it is freed correctly */ + if (cma) + folio_set_hugetlb_cma(new_folio); list_add(&new_folio->lru, &dst_list); } } From fb881cd7604536b17a1927fb0533f9a6982ffcc5 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 3 May 2025 14:33:14 +0900 Subject: [PATCH 516/559] nilfs2: fix deadlock warnings caused by lock dependency in init_nilfs() After commit c0e473a0d226 ("block: fix race between set_blocksize and read paths") was merged, set_blocksize() called by sb_set_blocksize() now locks the inode of the backing device file. As a result of this change, syzbot started reporting deadlock warnings due to a circular dependency involving the semaphore "ns_sem" of the nilfs object, the inode lock of the backing device file, and the locks that this inode lock is transitively dependent on. This is caused by a new lock dependency added by the above change, since init_nilfs() calls sb_set_blocksize() in the lock section of "ns_sem". However, these warnings are false positives because init_nilfs() is called in the early stage of the mount operation and the filesystem has not yet started. The reason why "ns_sem" is locked in init_nilfs() was to avoid a race condition in nilfs_fill_super() caused by sharing a nilfs object among multiple filesystem instances (super block structures) in the early implementation. However, nilfs objects and super block structures have long ago become one-to-one, and there is no longer any need to use the semaphore there. So, fix this issue by removing the use of the semaphore "ns_sem" in init_nilfs(). Link: https://lkml.kernel.org/r/20250503053327.12294-1-konishi.ryusuke@gmail.com Fixes: c0e473a0d226 ("block: fix race between set_blocksize and read paths") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=00f7f5b884b117ee6773 Tested-by: syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com Reported-by: syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=f30591e72bfc24d4715b Tested-by: syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com> Signed-off-by: Andrew Morton --- fs/nilfs2/the_nilfs.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index cb01ea81724d..d0bcf744c553 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -705,8 +705,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) int blocksize; int err; - down_write(&nilfs->ns_sem); - blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); if (!blocksize) { nilfs_err(sb, "unable to set blocksize"); @@ -779,7 +777,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) set_nilfs_init(nilfs); err = 0; out: - up_write(&nilfs->ns_sem); return err; failed_sbh: From 7b08b74f3d99f6b801250683c751d391128799ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Fri, 2 May 2025 23:50:19 +0200 Subject: [PATCH 517/559] mm: fix folio_pte_batch() on XEN PV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On XEN PV, folio_pte_batch() can incorrectly batch beyond the end of a folio due to a corner case in pte_advance_pfn(). Specifically, when the PFN following the folio maps to an invalidated MFN, expected_pte = pte_advance_pfn(expected_pte, nr); produces a pte_none(). If the actual next PTE in memory is also pte_none(), the pte_same() succeeds, if (!pte_same(pte, expected_pte)) break; the loop is not broken, and batching continues into unrelated memory. For example, with a 4-page folio, the PTE layout might look like this: [ 53.465673] [ T2552] folio_pte_batch: printing PTE values at addr=0x7f1ac9dc5000 [ 53.465674] [ T2552] PTE[453] = 000000010085c125 [ 53.465679] [ T2552] PTE[454] = 000000010085d125 [ 53.465682] [ T2552] PTE[455] = 000000010085e125 [ 53.465684] [ T2552] PTE[456] = 000000010085f125 [ 53.465686] [ T2552] PTE[457] = 0000000000000000 <-- not present [ 53.465689] [ T2552] PTE[458] = 0000000101da7125 pte_advance_pfn(PTE[456]) returns a pte_none() due to invalid PFN->MFN mapping. The next actual PTE (PTE[457]) is also pte_none(), so the loop continues and includes PTE[457] in the batch, resulting in 5 batched entries for a 4-page folio. This triggers the following warning: [ 53.465751] [ T2552] page: refcount:85 mapcount:20 mapping:ffff88813ff4f6a8 index:0x110 pfn:0x10085c [ 53.465754] [ T2552] head: order:2 mapcount:80 entire_mapcount:0 nr_pages_mapped:4 pincount:0 [ 53.465756] [ T2552] memcg:ffff888003573000 [ 53.465758] [ T2552] aops:0xffffffff8226fd20 ino:82467c dentry name(?):"libc.so.6" [ 53.465761] [ T2552] flags: 0x2000000000416c(referenced|uptodate|lru|active|private|head|node=0|zone=2) [ 53.465764] [ T2552] raw: 002000000000416c ffffea0004021f08 ffffea0004021908 ffff88813ff4f6a8 [ 53.465767] [ T2552] raw: 0000000000000110 ffff888133d8bd40 0000005500000013 ffff888003573000 [ 53.465768] [ T2552] head: 002000000000416c ffffea0004021f08 ffffea0004021908 ffff88813ff4f6a8 [ 53.465770] [ T2552] head: 0000000000000110 ffff888133d8bd40 0000005500000013 ffff888003573000 [ 53.465772] [ T2552] head: 0020000000000202 ffffea0004021701 000000040000004f 00000000ffffffff [ 53.465774] [ T2552] head: 0000000300000003 8000000300000002 0000000000000013 0000000000000004 [ 53.465775] [ T2552] page dumped because: VM_WARN_ON_FOLIO((_Generic((page + nr_pages - 1), const struct page *: (const struct folio *)_compound_head(page + nr_pages - 1), struct page *: (struct folio *)_compound_head(page + nr_pages - 1))) != folio) Original code works as expected everywhere, except on XEN PV, where pte_advance_pfn() can yield a pte_none() after balloon inflation due to MFNs invalidation. In XEN, pte_advance_pfn() ends up calling __pte()->xen_make_pte()->pte_pfn_to_mfn(), which returns pte_none() when mfn == INVALID_P2M_ENTRY. The pte_pfn_to_mfn() documents that nastiness: If there's no mfn for the pfn, then just create an empty non-present pte. Unfortunately this loses information about the original pfn, so pte_mfn_to_pfn is asymmetric. While such hacks should certainly be removed, we can do better in folio_pte_batch() and simply check ahead of time how many PTEs we can possibly batch in our folio. This way, we can not only fix the issue but cleanup the code: removing the pte_pfn() check inside the loop body and avoiding end_ptr comparison + arithmetic. Link: https://lkml.kernel.org/r/20250502215019.822-2-arkamar@atlas.cz Fixes: f8d937761d65 ("mm/memory: optimize fork() with PTE-mapped THP") Co-developed-by: David Hildenbrand Signed-off-by: David Hildenbrand Signed-off-by: Petr Vaněk Cc: Ryan Roberts Cc: Signed-off-by: Andrew Morton --- mm/internal.h | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index e9695baa5922..25a29872c634 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -248,11 +248,9 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr, pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags, bool *any_writable, bool *any_young, bool *any_dirty) { - unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio); - const pte_t *end_ptep = start_ptep + max_nr; pte_t expected_pte, *ptep; bool writable, young, dirty; - int nr; + int nr, cur_nr; if (any_writable) *any_writable = false; @@ -265,11 +263,15 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr, VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio); VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio); + /* Limit max_nr to the actual remaining PFNs in the folio we could batch. */ + max_nr = min_t(unsigned long, max_nr, + folio_pfn(folio) + folio_nr_pages(folio) - pte_pfn(pte)); + nr = pte_batch_hint(start_ptep, pte); expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags); ptep = start_ptep + nr; - while (ptep < end_ptep) { + while (nr < max_nr) { pte = ptep_get(ptep); if (any_writable) writable = !!pte_write(pte); @@ -282,14 +284,6 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr, if (!pte_same(pte, expected_pte)) break; - /* - * Stop immediately once we reached the end of the folio. In - * corner cases the next PFN might fall into a different - * folio. - */ - if (pte_pfn(pte) >= folio_end_pfn) - break; - if (any_writable) *any_writable |= writable; if (any_young) @@ -297,12 +291,13 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr, if (any_dirty) *any_dirty |= dirty; - nr = pte_batch_hint(ptep, pte); - expected_pte = pte_advance_pfn(expected_pte, nr); - ptep += nr; + cur_nr = pte_batch_hint(ptep, pte); + expected_pte = pte_advance_pfn(expected_pte, cur_nr); + ptep += cur_nr; + nr += cur_nr; } - return min(ptep - start_ptep, max_nr); + return min(nr, max_nr); } /** From f34343cc11afc7bb1f881c3492bee3484016bf71 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 08:59:39 -0700 Subject: [PATCH 518/559] fbnic: Fix initialization of mailbox descriptor rings Address to issues with the FW mailbox descriptor initialization. We need to reverse the order of accesses when we invalidate an entry versus writing an entry. When writing an entry we write upper and then lower as the lower 32b contain the valid bit that makes the entire address valid. However for invalidation we should write it in the reverse order so that the upper is marked invalid before we update it. Without this change we may see FW attempt to access pages with the upper 32b of the address set to 0 which will likely result in DMAR faults due to write access failures on mailbox shutdown. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654717972.499179.8083789731819297034.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 32 ++++++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 88db3dacb940..c4956f0a741e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -17,11 +17,29 @@ static void __fbnic_mbx_wr_desc(struct fbnic_dev *fbd, int mbx_idx, { u32 desc_offset = FBNIC_IPC_MBX(mbx_idx, desc_idx); + /* Write the upper 32b and then the lower 32b. Doing this the + * FW can then read lower, upper, lower to verify that the state + * of the descriptor wasn't changed mid-transaction. + */ fw_wr32(fbd, desc_offset + 1, upper_32_bits(desc)); fw_wrfl(fbd); fw_wr32(fbd, desc_offset, lower_32_bits(desc)); } +static void __fbnic_mbx_invalidate_desc(struct fbnic_dev *fbd, int mbx_idx, + int desc_idx, u32 desc) +{ + u32 desc_offset = FBNIC_IPC_MBX(mbx_idx, desc_idx); + + /* For initialization we write the lower 32b of the descriptor first. + * This way we can set the state to mark it invalid before we clear the + * upper 32b. + */ + fw_wr32(fbd, desc_offset, desc); + fw_wrfl(fbd); + fw_wr32(fbd, desc_offset + 1, 0); +} + static u64 __fbnic_mbx_rd_desc(struct fbnic_dev *fbd, int mbx_idx, int desc_idx) { u32 desc_offset = FBNIC_IPC_MBX(mbx_idx, desc_idx); @@ -41,21 +59,17 @@ static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) * solid stop for the firmware to hit when it is done looping * through the ring. */ - __fbnic_mbx_wr_desc(fbd, mbx_idx, 0, 0); - - fw_wrfl(fbd); + __fbnic_mbx_invalidate_desc(fbd, mbx_idx, 0, 0); /* We then fill the rest of the ring starting at the end and moving * back toward descriptor 0 with skip descriptors that have no * length nor address, and tell the firmware that they can skip * them and just move past them to the one we initialized to 0. */ - for (desc_idx = FBNIC_IPC_MBX_DESC_LEN; --desc_idx;) { - __fbnic_mbx_wr_desc(fbd, mbx_idx, desc_idx, - FBNIC_IPC_MBX_DESC_FW_CMPL | - FBNIC_IPC_MBX_DESC_HOST_CMPL); - fw_wrfl(fbd); - } + for (desc_idx = FBNIC_IPC_MBX_DESC_LEN; --desc_idx;) + __fbnic_mbx_invalidate_desc(fbd, mbx_idx, desc_idx, + FBNIC_IPC_MBX_DESC_FW_CMPL | + FBNIC_IPC_MBX_DESC_HOST_CMPL); } void fbnic_mbx_init(struct fbnic_dev *fbd) From 3b12f00ddd08e888273b2ac0488d396d90a836fc Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 08:59:46 -0700 Subject: [PATCH 519/559] fbnic: Gate AXI read/write enabling on FW mailbox In order to prevent the device from throwing spurious writes and/or reads at us we need to gate the AXI fabric interface to the PCIe until such time as we know the FW is in a known good state. To accomplish this we use the mailbox as a mechanism for us to recognize that the FW has acknowledged our presence and is no longer sending any stale message data to us. We start in fbnic_mbx_init by calling fbnic_mbx_reset_desc_ring function, disabling the DMA in both directions, and then invalidating all the descriptors in each ring. We then poll the mailbox in fbnic_mbx_poll_tx_ready and when the interrupt is set by the FW we pick it up and mark the mailboxes as ready, while also enabling the DMA. Once we have completed all the transactions and need to shut down we call into fbnic_mbx_clean which will in turn call fbnic_mbx_reset_desc_ring for each ring and shut down the DMA and once again invalidate the descriptors. Fixes: 3646153161f1 ("eth: fbnic: Add register init to set PCIe/Ethernet device config") Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654718623.499179.7445197308109347982.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_csr.h | 2 ++ drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 38 +++++++++++++++++---- drivers/net/ethernet/meta/fbnic/fbnic_mac.c | 6 ---- 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index 3b12a0ab5906..51bee8072420 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -796,8 +796,10 @@ enum { /* PUL User Registers */ #define FBNIC_CSR_START_PUL_USER 0x31000 /* CSR section delimiter */ #define FBNIC_PUL_OB_TLP_HDR_AW_CFG 0x3103d /* 0xc40f4 */ +#define FBNIC_PUL_OB_TLP_HDR_AW_CFG_FLUSH CSR_BIT(19) #define FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME CSR_BIT(18) #define FBNIC_PUL_OB_TLP_HDR_AR_CFG 0x3103e /* 0xc40f8 */ +#define FBNIC_PUL_OB_TLP_HDR_AR_CFG_FLUSH CSR_BIT(19) #define FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME CSR_BIT(18) #define FBNIC_PUL_USER_OB_RD_TLP_CNT_31_0 \ 0x3106e /* 0xc41b8 */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index c4956f0a741e..f4749bfd840c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -51,10 +51,26 @@ static u64 __fbnic_mbx_rd_desc(struct fbnic_dev *fbd, int mbx_idx, int desc_idx) return desc; } -static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) +static void fbnic_mbx_reset_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { int desc_idx; + /* Disable DMA transactions from the device, + * and flush any transactions triggered during cleaning + */ + switch (mbx_idx) { + case FBNIC_IPC_MBX_RX_IDX: + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AW_CFG, + FBNIC_PUL_OB_TLP_HDR_AW_CFG_FLUSH); + break; + case FBNIC_IPC_MBX_TX_IDX: + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, + FBNIC_PUL_OB_TLP_HDR_AR_CFG_FLUSH); + break; + } + + wrfl(fbd); + /* Initialize first descriptor to all 0s. Doing this gives us a * solid stop for the firmware to hit when it is done looping * through the ring. @@ -90,7 +106,7 @@ void fbnic_mbx_init(struct fbnic_dev *fbd) wr32(fbd, FBNIC_INTR_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) - fbnic_mbx_init_desc_ring(fbd, i); + fbnic_mbx_reset_desc_ring(fbd, i); } static int fbnic_mbx_map_msg(struct fbnic_dev *fbd, int mbx_idx, @@ -155,7 +171,7 @@ static void fbnic_mbx_clean_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { int i; - fbnic_mbx_init_desc_ring(fbd, mbx_idx); + fbnic_mbx_reset_desc_ring(fbd, mbx_idx); for (i = FBNIC_IPC_MBX_DESC_LEN; i--;) fbnic_mbx_unmap_and_free_msg(fbd, mbx_idx, i); @@ -354,7 +370,7 @@ static int fbnic_fw_xmit_cap_msg(struct fbnic_dev *fbd) return (err == -EOPNOTSUPP) ? 0 : err; } -static void fbnic_mbx_postinit_desc_ring(struct fbnic_dev *fbd, int mbx_idx) +static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { struct fbnic_fw_mbx *mbx = &fbd->mbx[mbx_idx]; @@ -366,10 +382,18 @@ static void fbnic_mbx_postinit_desc_ring(struct fbnic_dev *fbd, int mbx_idx) switch (mbx_idx) { case FBNIC_IPC_MBX_RX_IDX: + /* Enable DMA writes from the device */ + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AW_CFG, + FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME); + /* Make sure we have a page for the FW to write to */ fbnic_mbx_alloc_rx_msgs(fbd); break; case FBNIC_IPC_MBX_TX_IDX: + /* Enable DMA reads from the device */ + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, + FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME); + /* Force version to 1 if we successfully requested an update * from the firmware. This should be overwritten once we get * the actual version from the firmware in the capabilities @@ -386,7 +410,7 @@ static void fbnic_mbx_postinit(struct fbnic_dev *fbd) { int i; - /* We only need to do this on the first interrupt following init. + /* We only need to do this on the first interrupt following reset. * this primes the mailbox so that we will have cleared all the * skip descriptors. */ @@ -396,7 +420,7 @@ static void fbnic_mbx_postinit(struct fbnic_dev *fbd) wr32(fbd, FBNIC_INTR_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) - fbnic_mbx_postinit_desc_ring(fbd, i); + fbnic_mbx_init_desc_ring(fbd, i); } /** @@ -894,7 +918,7 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) * avoid the mailbox getting stuck closed if the interrupt * is reset. */ - fbnic_mbx_init_desc_ring(fbd, FBNIC_IPC_MBX_TX_IDX); + fbnic_mbx_reset_desc_ring(fbd, FBNIC_IPC_MBX_TX_IDX); msleep(200); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c index 14291401f463..dde4a37116e2 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c @@ -79,12 +79,6 @@ static void fbnic_mac_init_axi(struct fbnic_dev *fbd) fbnic_init_readrq(fbd, FBNIC_QM_RNI_RBP_CTL, cls, readrq); fbnic_init_mps(fbd, FBNIC_QM_RNI_RDE_CTL, cls, mps); fbnic_init_mps(fbd, FBNIC_QM_RNI_RCM_CTL, cls, mps); - - /* Enable XALI AR/AW outbound */ - wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AW_CFG, - FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME); - wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, - FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME); } static void fbnic_mac_init_qm(struct fbnic_dev *fbd) From 682a61281d1036962b68d651994cc407371daef5 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 08:59:52 -0700 Subject: [PATCH 520/559] fbnic: Add additional handling of IRQs We have two issues that need to be addressed in our IRQ handling. One is the fact that we can end up double-freeing IRQs in the event of an exception handling error such as a PCIe reset/recovery that fails. To prevent that from becoming an issue we can use the msix_vector values to indicate that we have successfully requested/freed the IRQ by only setting or clearing them when we have completed the given action. The other issue is that we have several potential races in our IRQ path due to us manipulating the mask before the vector has been truly disabled. In order to handle that in the case of the FW mailbox we need to not auto-enable the IRQ and instead will be enabling/disabling it separately. In the case of the PCS vector we can mitigate this by unmapping it and synchronizing the IRQ before we clear the mask. The general order of operations after this change is now to request the interrupt, poll the FW mailbox to ready, and then enable the interrupt. For the shutdown we do the reverse where we disable the interrupt, flush any pending Tx, and then free the IRQ. I am renaming the enable/disable to request/free to be equivilent with the IRQ calls being used. We may see additions in the future to enable/disable the IRQs versus request/free them for certain use cases. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Fixes: 69684376eed5 ("eth: fbnic: Add link detection") Signed-off-by: Alexander Duyck Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654719271.499179.3634535105127848325.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic.h | 8 +- drivers/net/ethernet/meta/fbnic/fbnic_irq.c | 148 ++++++++++++------ .../net/ethernet/meta/fbnic/fbnic_netdev.c | 5 +- drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 14 +- 4 files changed, 113 insertions(+), 62 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index 4ca7b99ef131..de6b1a340f55 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -154,14 +154,14 @@ struct fbnic_dev *fbnic_devlink_alloc(struct pci_dev *pdev); void fbnic_devlink_register(struct fbnic_dev *fbd); void fbnic_devlink_unregister(struct fbnic_dev *fbd); -int fbnic_fw_enable_mbx(struct fbnic_dev *fbd); -void fbnic_fw_disable_mbx(struct fbnic_dev *fbd); +int fbnic_fw_request_mbx(struct fbnic_dev *fbd); +void fbnic_fw_free_mbx(struct fbnic_dev *fbd); void fbnic_hwmon_register(struct fbnic_dev *fbd); void fbnic_hwmon_unregister(struct fbnic_dev *fbd); -int fbnic_pcs_irq_enable(struct fbnic_dev *fbd); -void fbnic_pcs_irq_disable(struct fbnic_dev *fbd); +int fbnic_pcs_request_irq(struct fbnic_dev *fbd); +void fbnic_pcs_free_irq(struct fbnic_dev *fbd); void fbnic_napi_name_irqs(struct fbnic_dev *fbd); int fbnic_napi_request_irq(struct fbnic_dev *fbd, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c index 1bbc0e56f3a0..1c88a2bf3a7a 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c @@ -19,69 +19,105 @@ static irqreturn_t fbnic_fw_msix_intr(int __always_unused irq, void *data) return IRQ_HANDLED; } -/** - * fbnic_fw_enable_mbx - Configure and initialize Firmware Mailbox - * @fbd: Pointer to device to initialize - * - * This function will initialize the firmware mailbox rings, enable the IRQ - * and initialize the communication between the Firmware and the host. The - * firmware is expected to respond to the initialization by sending an - * interrupt essentially notifying the host that it has seen the - * initialization and is now synced up. - * - * Return: non-zero on failure. - **/ -int fbnic_fw_enable_mbx(struct fbnic_dev *fbd) +static int __fbnic_fw_enable_mbx(struct fbnic_dev *fbd, int vector) { - u32 vector = fbd->fw_msix_vector; int err; - /* Request the IRQ for FW Mailbox vector. */ - err = request_threaded_irq(vector, NULL, &fbnic_fw_msix_intr, - IRQF_ONESHOT, dev_name(fbd->dev), fbd); - if (err) - return err; - /* Initialize mailbox and attempt to poll it into ready state */ fbnic_mbx_init(fbd); err = fbnic_mbx_poll_tx_ready(fbd); if (err) { dev_warn(fbd->dev, "FW mailbox did not enter ready state\n"); - free_irq(vector, fbd); return err; } - /* Enable interrupts */ + /* Enable interrupt and unmask the vector */ + enable_irq(vector); fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); return 0; } /** - * fbnic_fw_disable_mbx - Disable mailbox and place it in standby state - * @fbd: Pointer to device to disable + * fbnic_fw_request_mbx - Configure and initialize Firmware Mailbox + * @fbd: Pointer to device to initialize * - * This function will disable the mailbox interrupt, free any messages still - * in the mailbox and place it into a standby state. The firmware is - * expected to see the update and assume that the host is in the reset state. + * This function will allocate the IRQ and then reinitialize the mailbox + * starting communication between the host and firmware. + * + * Return: non-zero on failure. **/ -void fbnic_fw_disable_mbx(struct fbnic_dev *fbd) +int fbnic_fw_request_mbx(struct fbnic_dev *fbd) { - /* Disable interrupt and free vector */ - fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(0), 1u << FBNIC_FW_MSIX_ENTRY); + struct pci_dev *pdev = to_pci_dev(fbd->dev); + int vector, err; - /* Free the vector */ - free_irq(fbd->fw_msix_vector, fbd); + WARN_ON(fbd->fw_msix_vector); + + vector = pci_irq_vector(pdev, FBNIC_FW_MSIX_ENTRY); + if (vector < 0) + return vector; + + /* Request the IRQ for FW Mailbox vector. */ + err = request_threaded_irq(vector, NULL, &fbnic_fw_msix_intr, + IRQF_ONESHOT | IRQF_NO_AUTOEN, + dev_name(fbd->dev), fbd); + if (err) + return err; + + /* Initialize mailbox and attempt to poll it into ready state */ + err = __fbnic_fw_enable_mbx(fbd, vector); + if (err) + free_irq(vector, fbd); + + fbd->fw_msix_vector = vector; + + return err; +} + +/** + * fbnic_fw_disable_mbx - Temporarily place mailbox in standby state + * @fbd: Pointer to device + * + * Shutdown the mailbox by notifying the firmware to stop sending us logs, mask + * and synchronize the IRQ, and then clean up the rings. + **/ +static void fbnic_fw_disable_mbx(struct fbnic_dev *fbd) +{ + /* Disable interrupt and synchronize the IRQ */ + disable_irq(fbd->fw_msix_vector); + + /* Mask the vector */ + fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(0), 1u << FBNIC_FW_MSIX_ENTRY); /* Make sure disabling logs message is sent, must be done here to * avoid risk of completing without a running interrupt. */ fbnic_mbx_flush_tx(fbd); - - /* Reset the mailboxes to the initialized state */ fbnic_mbx_clean(fbd); } +/** + * fbnic_fw_free_mbx - Disable mailbox and place it in standby state + * @fbd: Pointer to device to disable + * + * This function will disable the mailbox interrupt, free any messages still + * in the mailbox and place it into a disabled state. The firmware is + * expected to see the update and assume that the host is in the reset state. + **/ +void fbnic_fw_free_mbx(struct fbnic_dev *fbd) +{ + /* Vector has already been freed */ + if (!fbd->fw_msix_vector) + return; + + fbnic_fw_disable_mbx(fbd); + + /* Free the vector */ + free_irq(fbd->fw_msix_vector, fbd); + fbd->fw_msix_vector = 0; +} + static irqreturn_t fbnic_pcs_msix_intr(int __always_unused irq, void *data) { struct fbnic_dev *fbd = data; @@ -101,7 +137,7 @@ static irqreturn_t fbnic_pcs_msix_intr(int __always_unused irq, void *data) } /** - * fbnic_pcs_irq_enable - Configure the MAC to enable it to advertise link + * fbnic_pcs_request_irq - Configure the PCS to enable it to advertise link * @fbd: Pointer to device to initialize * * This function provides basic bringup for the MAC/PCS IRQ. For now the IRQ @@ -109,41 +145,61 @@ static irqreturn_t fbnic_pcs_msix_intr(int __always_unused irq, void *data) * * Return: non-zero on failure. **/ -int fbnic_pcs_irq_enable(struct fbnic_dev *fbd) +int fbnic_pcs_request_irq(struct fbnic_dev *fbd) { - u32 vector = fbd->pcs_msix_vector; - int err; + struct pci_dev *pdev = to_pci_dev(fbd->dev); + int vector, err; - /* Request the IRQ for MAC link vector. - * Map MAC cause to it, and unmask it + WARN_ON(fbd->pcs_msix_vector); + + vector = pci_irq_vector(pdev, FBNIC_PCS_MSIX_ENTRY); + if (vector < 0) + return vector; + + /* Request the IRQ for PCS link vector. + * Map PCS cause to it, and unmask it */ err = request_irq(vector, &fbnic_pcs_msix_intr, 0, fbd->netdev->name, fbd); if (err) return err; + /* Map and enable interrupt, unmask vector after link is configured */ fbnic_wr32(fbd, FBNIC_INTR_MSIX_CTRL(FBNIC_INTR_MSIX_CTRL_PCS_IDX), FBNIC_PCS_MSIX_ENTRY | FBNIC_INTR_MSIX_CTRL_ENABLE); + fbd->pcs_msix_vector = vector; + return 0; } /** - * fbnic_pcs_irq_disable - Teardown the MAC IRQ to prepare for stopping + * fbnic_pcs_free_irq - Teardown the PCS IRQ to prepare for stopping * @fbd: Pointer to device that is stopping * - * This function undoes the work done in fbnic_pcs_irq_enable and prepares + * This function undoes the work done in fbnic_pcs_request_irq and prepares * the device to no longer receive traffic on the host interface. **/ -void fbnic_pcs_irq_disable(struct fbnic_dev *fbd) +void fbnic_pcs_free_irq(struct fbnic_dev *fbd) { + /* Vector has already been freed */ + if (!fbd->pcs_msix_vector) + return; + /* Disable interrupt */ fbnic_wr32(fbd, FBNIC_INTR_MSIX_CTRL(FBNIC_INTR_MSIX_CTRL_PCS_IDX), FBNIC_PCS_MSIX_ENTRY); + fbnic_wrfl(fbd); + + /* Synchronize IRQ to prevent race that would unmask vector */ + synchronize_irq(fbd->pcs_msix_vector); + + /* Mask the vector */ fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(0), 1u << FBNIC_PCS_MSIX_ENTRY); /* Free the vector */ free_irq(fbd->pcs_msix_vector, fbd); + fbd->pcs_msix_vector = 0; } void fbnic_synchronize_irq(struct fbnic_dev *fbd, int nr) @@ -226,9 +282,6 @@ void fbnic_free_irqs(struct fbnic_dev *fbd) { struct pci_dev *pdev = to_pci_dev(fbd->dev); - fbd->pcs_msix_vector = 0; - fbd->fw_msix_vector = 0; - fbd->num_irqs = 0; pci_free_irq_vectors(pdev); @@ -254,8 +307,5 @@ int fbnic_alloc_irqs(struct fbnic_dev *fbd) fbd->num_irqs = num_irqs; - fbd->pcs_msix_vector = pci_irq_vector(pdev, FBNIC_PCS_MSIX_ENTRY); - fbd->fw_msix_vector = pci_irq_vector(pdev, FBNIC_FW_MSIX_ENTRY); - return 0; } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 79a01fdd1dd1..2524d9b88d59 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -44,9 +44,10 @@ int __fbnic_open(struct fbnic_net *fbn) if (err) goto time_stop; - err = fbnic_pcs_irq_enable(fbd); + err = fbnic_pcs_request_irq(fbd); if (err) goto time_stop; + /* Pull the BMC config and initialize the RPC */ fbnic_bmc_rpc_init(fbd); fbnic_rss_reinit(fbd, fbn); @@ -82,7 +83,7 @@ static int fbnic_stop(struct net_device *netdev) struct fbnic_net *fbn = netdev_priv(netdev); fbnic_down(fbn); - fbnic_pcs_irq_disable(fbn->fbd); + fbnic_pcs_free_irq(fbn->fbd); fbnic_time_stop(fbn); fbnic_fw_xmit_ownership_msg(fbn->fbd, false); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 6cbbc2ee3e1f..4e8595239c0f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -283,7 +283,7 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto free_irqs; } - err = fbnic_fw_enable_mbx(fbd); + err = fbnic_fw_request_mbx(fbd); if (err) { dev_err(&pdev->dev, "Firmware mailbox initialization failure\n"); @@ -363,7 +363,7 @@ static void fbnic_remove(struct pci_dev *pdev) fbnic_hwmon_unregister(fbd); fbnic_dbg_fbd_exit(fbd); fbnic_devlink_unregister(fbd); - fbnic_fw_disable_mbx(fbd); + fbnic_fw_free_mbx(fbd); fbnic_free_irqs(fbd); fbnic_devlink_free(fbd); @@ -387,7 +387,7 @@ static int fbnic_pm_suspend(struct device *dev) rtnl_unlock(); null_uc_addr: - fbnic_fw_disable_mbx(fbd); + fbnic_fw_free_mbx(fbd); /* Free the IRQs so they aren't trying to occupy sleeping CPUs */ fbnic_free_irqs(fbd); @@ -420,7 +420,7 @@ static int __fbnic_pm_resume(struct device *dev) fbd->mac->init_regs(fbd); /* Re-enable mailbox */ - err = fbnic_fw_enable_mbx(fbd); + err = fbnic_fw_request_mbx(fbd); if (err) goto err_free_irqs; @@ -438,15 +438,15 @@ static int __fbnic_pm_resume(struct device *dev) if (netif_running(netdev)) { err = __fbnic_open(fbn); if (err) - goto err_disable_mbx; + goto err_free_mbx; } rtnl_unlock(); return 0; -err_disable_mbx: +err_free_mbx: rtnl_unlock(); - fbnic_fw_disable_mbx(fbd); + fbnic_fw_free_mbx(fbd); err_free_irqs: fbnic_free_irqs(fbd); err_invalidate_uc_addr: From 0f9a959a0addd9bbc47e5d16c36b3a7f97981915 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 08:59:59 -0700 Subject: [PATCH 521/559] fbnic: Actually flush_tx instead of stalling out The fbnic_mbx_flush_tx function had a number of issues. First, we were waiting 200ms for the firmware to process the packets. We can drop this to 20ms and in almost all cases this should be more than enough time. So by changing this we can significantly reduce shutdown time. Second, we were not making sure that the Tx path was actually shut off. As such we could still have packets added while we were flushing the mailbox. To prevent that we can now clear the ready flag for the Tx side and it should stay down since the interrupt is disabled. Third, we kept re-reading the tail due to the second issue. The tail should not move after we have started the flush so we can just read it once while we are holding the mailbox Tx lock. By doing that we are guaranteed that the value should be consistent. Fourth, we were keeping a count of descriptors cleaned due to the second and third issues called out. That count is not a valid reason to be exiting the cleanup, and with the tail only being read once we shouldn't see any cases where the tail moves after the disable so the tracking of count can be dropped. Fifth, we were using attempts * sleep time to determine how long we would wait in our polling loop to flush out the Tx. This can be very imprecise. In order to tighten up the timing we are shifting over to using a jiffies value of jiffies + 10 * HZ + 1 to determine the jiffies value we should stop polling at as this should be accurate within once sleep cycle for the total amount of time spent polling. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654719929.499179.16406653096197423749.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 31 +++++++++++----------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index f4749bfd840c..d019191d6ae9 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -930,35 +930,36 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) { + unsigned long timeout = jiffies + 10 * HZ + 1; struct fbnic_fw_mbx *tx_mbx; - int attempts = 50; - u8 count = 0; - - /* Nothing to do if there is no mailbox */ - if (!fbnic_fw_present(fbd)) - return; + u8 tail; /* Record current Rx stats */ tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; - /* Nothing to do if mailbox never got to ready */ - if (!tx_mbx->ready) - return; + spin_lock_irq(&fbd->fw_tx_lock); + + /* Clear ready to prevent any further attempts to transmit */ + tx_mbx->ready = false; + + /* Read tail to determine the last tail state for the ring */ + tail = tx_mbx->tail; + + spin_unlock_irq(&fbd->fw_tx_lock); /* Give firmware time to process packet, - * we will wait up to 10 seconds which is 50 waits of 200ms. + * we will wait up to 10 seconds which is 500 waits of 20ms. */ do { u8 head = tx_mbx->head; - if (head == tx_mbx->tail) + /* Tx ring is empty once head == tail */ + if (head == tail) break; - msleep(200); + msleep(20); fbnic_mbx_process_tx_msgs(fbd); - - count += (tx_mbx->head - head) % FBNIC_IPC_MBX_DESC_LEN; - } while (count < FBNIC_IPC_MBX_DESC_LEN && --attempts); + } while (time_is_after_jiffies(timeout)); } void fbnic_get_fw_ver_commit_str(struct fbnic_dev *fbd, char *fw_version, From cdbb2dc3996a60ed3d7431c1239a8ca98c778e04 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 09:00:05 -0700 Subject: [PATCH 522/559] fbnic: Cleanup handling of completions There was an issue in that if we were to shutdown we could be left with a completion in flight as the mailbox went away. To address that I have added an fbnic_mbx_evict_all_cmpl function that is meant to essentially create a "broken pipe" type response so that all callers will receive an error indicating that the connection has been broken as a result of us shutting down the mailbox. Fixes: 378e5cc1c6c6 ("eth: fbnic: hwmon: Add completion infrastructure for firmware requests") Signed-off-by: Alexander Duyck Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654720578.499179.380252598204530873.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index d019191d6ae9..732875aae46c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -928,6 +928,20 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) return attempts ? 0 : -ETIMEDOUT; } +static void __fbnic_fw_evict_cmpl(struct fbnic_fw_completion *cmpl_data) +{ + cmpl_data->result = -EPIPE; + complete(&cmpl_data->done); +} + +static void fbnic_mbx_evict_all_cmpl(struct fbnic_dev *fbd) +{ + if (fbd->cmpl_data) { + __fbnic_fw_evict_cmpl(fbd->cmpl_data); + fbd->cmpl_data = NULL; + } +} + void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) { unsigned long timeout = jiffies + 10 * HZ + 1; @@ -945,6 +959,9 @@ void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) /* Read tail to determine the last tail state for the ring */ tail = tx_mbx->tail; + /* Flush any completions as we are no longer processing Rx */ + fbnic_mbx_evict_all_cmpl(fbd); + spin_unlock_irq(&fbd->fw_tx_lock); /* Give firmware time to process packet, From ab064f6005973d456f95ae99cd9ea0d8ab676cce Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 09:00:12 -0700 Subject: [PATCH 523/559] fbnic: Improve responsiveness of fbnic_mbx_poll_tx_ready There were a couple different issues found in fbnic_mbx_poll_tx_ready. Among them were the fact that we were sleeping much longer than we actually needed to as the actual FW could respond in under 20ms. The other issue was that we would just keep polling the mailbox even if the device itself had gone away. To address the responsiveness issues we can decrease the sleeps to 20ms and use a jiffies based timeout value rather than just counting the number of times we slept and then polled. To address the hardware going away we can move the check for the firmware BAR being present from where it was and place it inside the loop after the mailbox descriptor ring is initialized and before we sleep so that we just abort and return an error if the device went away during initialization. With these two changes we see a significant improvement in boot times for the driver. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654721224.499179.2698616208976624755.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 732875aae46c..d344b454f28b 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -905,27 +905,30 @@ void fbnic_mbx_poll(struct fbnic_dev *fbd) int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) { + unsigned long timeout = jiffies + 10 * HZ + 1; struct fbnic_fw_mbx *tx_mbx; - int attempts = 50; - - /* Immediate fail if BAR4 isn't there */ - if (!fbnic_fw_present(fbd)) - return -ENODEV; tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; - while (!tx_mbx->ready && --attempts) { + while (!tx_mbx->ready) { + if (!time_is_after_jiffies(timeout)) + return -ETIMEDOUT; + /* Force the firmware to trigger an interrupt response to * avoid the mailbox getting stuck closed if the interrupt * is reset. */ fbnic_mbx_reset_desc_ring(fbd, FBNIC_IPC_MBX_TX_IDX); - msleep(200); + /* Immediate fail if BAR4 went away */ + if (!fbnic_fw_present(fbd)) + return -ENODEV; + + msleep(20); fbnic_mbx_poll(fbd); } - return attempts ? 0 : -ETIMEDOUT; + return 0; } static void __fbnic_fw_evict_cmpl(struct fbnic_fw_completion *cmpl_data) From 1b34d1c1dc8384884febd83140c9afbc7c4b9eb8 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 09:00:18 -0700 Subject: [PATCH 524/559] fbnic: Pull fbnic_fw_xmit_cap_msg use out of interrupt context This change pulls the call to fbnic_fw_xmit_cap_msg out of fbnic_mbx_init_desc_ring and instead places it in the polling function for getting the Tx ready. Doing that we can avoid the potential issue with an interrupt coming in later from the firmware that causes it to get fired in interrupt context. Fixes: 20d2e88cc746 ("eth: fbnic: Add initial messaging to notify FW of our presence") Signed-off-by: Alexander Duyck Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654721876.499179.9839651602256668493.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 43 ++++++++-------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index d344b454f28b..90a45c701543 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -352,24 +352,6 @@ static int fbnic_fw_xmit_simple_msg(struct fbnic_dev *fbd, u32 msg_type) return err; } -/** - * fbnic_fw_xmit_cap_msg - Allocate and populate a FW capabilities message - * @fbd: FBNIC device structure - * - * Return: NULL on failure to allocate, error pointer on error, or pointer - * to new TLV test message. - * - * Sends a single TLV header indicating the host wants the firmware to - * confirm the capabilities and version. - **/ -static int fbnic_fw_xmit_cap_msg(struct fbnic_dev *fbd) -{ - int err = fbnic_fw_xmit_simple_msg(fbd, FBNIC_TLV_MSG_ID_HOST_CAP_REQ); - - /* Return 0 if we are not calling this on ASIC */ - return (err == -EOPNOTSUPP) ? 0 : err; -} - static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { struct fbnic_fw_mbx *mbx = &fbd->mbx[mbx_idx]; @@ -393,15 +375,6 @@ static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) /* Enable DMA reads from the device */ wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME); - - /* Force version to 1 if we successfully requested an update - * from the firmware. This should be overwritten once we get - * the actual version from the firmware in the capabilities - * request message. - */ - if (!fbnic_fw_xmit_cap_msg(fbd) && - !fbd->fw_cap.running.mgmt.version) - fbd->fw_cap.running.mgmt.version = 1; break; } } @@ -907,6 +880,7 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) { unsigned long timeout = jiffies + 10 * HZ + 1; struct fbnic_fw_mbx *tx_mbx; + int err; tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; while (!tx_mbx->ready) { @@ -928,7 +902,22 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) fbnic_mbx_poll(fbd); } + /* Request an update from the firmware. This should overwrite + * mgmt.version once we get the actual version from the firmware + * in the capabilities request message. + */ + err = fbnic_fw_xmit_simple_msg(fbd, FBNIC_TLV_MSG_ID_HOST_CAP_REQ); + if (err) + goto clean_mbx; + + /* Use "1" to indicate we entered the state waiting for a response */ + fbd->fw_cap.running.mgmt.version = 1; + return 0; +clean_mbx: + /* Cleanup Rx buffers and disable mailbox */ + fbnic_mbx_clean(fbd); + return err; } static void __fbnic_fw_evict_cmpl(struct fbnic_fw_completion *cmpl_data) From ce2fa1dba204c761582674cf2eb9cbe0b949b5c7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 09:00:25 -0700 Subject: [PATCH 525/559] fbnic: Do not allow mailbox to toggle to ready outside fbnic_mbx_poll_tx_ready We had originally thought to have the mailbox go to ready in the background while we were doing other things. One issue with this though is that we can't disable it by clearing the ready state without also blocking interrupts or calls to mbx_poll as it will just pop back to life during an interrupt. In order to prevent that from happening we can pull the code for toggling to ready out of the interrupt path and instead place it in the fbnic_mbx_poll_tx_ready path so that it becomes the only spot where the Rx/Tx can toggle to the ready state. By doing this we can prevent races where we disable the DMA and/or free buffers only to have an interrupt fire and undo what we have done. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654722518.499179.11612865740376848478.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 27 ++++++++-------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 90a45c701543..3d9636a6c968 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -356,10 +356,6 @@ static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { struct fbnic_fw_mbx *mbx = &fbd->mbx[mbx_idx]; - /* This is a one time init, so just exit if it is completed */ - if (mbx->ready) - return; - mbx->ready = true; switch (mbx_idx) { @@ -379,21 +375,18 @@ static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) } } -static void fbnic_mbx_postinit(struct fbnic_dev *fbd) +static bool fbnic_mbx_event(struct fbnic_dev *fbd) { - int i; - /* We only need to do this on the first interrupt following reset. * this primes the mailbox so that we will have cleared all the * skip descriptors. */ if (!(rd32(fbd, FBNIC_INTR_STATUS(0)) & (1u << FBNIC_FW_MSIX_ENTRY))) - return; + return false; wr32(fbd, FBNIC_INTR_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); - for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) - fbnic_mbx_init_desc_ring(fbd, i); + return true; } /** @@ -870,7 +863,7 @@ next_page: void fbnic_mbx_poll(struct fbnic_dev *fbd) { - fbnic_mbx_postinit(fbd); + fbnic_mbx_event(fbd); fbnic_mbx_process_tx_msgs(fbd); fbnic_mbx_process_rx_msgs(fbd); @@ -879,11 +872,9 @@ void fbnic_mbx_poll(struct fbnic_dev *fbd) int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) { unsigned long timeout = jiffies + 10 * HZ + 1; - struct fbnic_fw_mbx *tx_mbx; - int err; + int err, i; - tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; - while (!tx_mbx->ready) { + do { if (!time_is_after_jiffies(timeout)) return -ETIMEDOUT; @@ -898,9 +889,11 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) return -ENODEV; msleep(20); + } while (!fbnic_mbx_event(fbd)); - fbnic_mbx_poll(fbd); - } + /* FW has shown signs of life. Enable DMA and start Tx/Rx */ + for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) + fbnic_mbx_init_desc_ring(fbd, i); /* Request an update from the firmware. This should overwrite * mgmt.version once we get the actual version from the firmware From 23fa6a23d97182d36ca3c71e43c804fa91e46a03 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 May 2025 17:32:20 -0700 Subject: [PATCH 526/559] net: export a helper for adding up queue stats Older drivers and drivers with lower queue counts often have a static array of queues, rather than allocating structs for each queue on demand. Add a helper for adding up qstats from a queue range. Expectation is that driver will pass a queue range [netdev->real_num_*x_queues, MAX). It was tempting to always use num_*x_queues as the end, but virtio seems to clamp its queue count after allocating the netdev. And this way we can trivaly reuse the helper for [0, real_..). Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20250507003221.823267-2-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/net/netdev_queues.h | 6 ++++ net/core/netdev-genl.c | 69 +++++++++++++++++++++++++++---------- 2 files changed, 56 insertions(+), 19 deletions(-) diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 825141d675e5..d01c82983b4d 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -103,6 +103,12 @@ struct netdev_stat_ops { struct netdev_queue_stats_tx *tx); }; +void netdev_stat_queue_sum(struct net_device *netdev, + int rx_start, int rx_end, + struct netdev_queue_stats_rx *rx_sum, + int tx_start, int tx_end, + struct netdev_queue_stats_tx *tx_sum); + /** * struct netdev_queue_mgmt_ops - netdev ops for queue management * diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 230743bdbb14..dae9f0d432fb 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -708,25 +708,66 @@ netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp, return 0; } +/** + * netdev_stat_queue_sum() - add up queue stats from range of queues + * @netdev: net_device + * @rx_start: index of the first Rx queue to query + * @rx_end: index after the last Rx queue (first *not* to query) + * @rx_sum: output Rx stats, should be already initialized + * @tx_start: index of the first Tx queue to query + * @tx_end: index after the last Tx queue (first *not* to query) + * @tx_sum: output Tx stats, should be already initialized + * + * Add stats from [start, end) range of queue IDs to *x_sum structs. + * The sum structs must be already initialized. Usually this + * helper is invoked from the .get_base_stats callbacks of drivers + * to account for stats of disabled queues. In that case the ranges + * are usually [netdev->real_num_*x_queues, netdev->num_*x_queues). + */ +void netdev_stat_queue_sum(struct net_device *netdev, + int rx_start, int rx_end, + struct netdev_queue_stats_rx *rx_sum, + int tx_start, int tx_end, + struct netdev_queue_stats_tx *tx_sum) +{ + const struct netdev_stat_ops *ops; + struct netdev_queue_stats_rx rx; + struct netdev_queue_stats_tx tx; + int i; + + ops = netdev->stat_ops; + + for (i = rx_start; i < rx_end; i++) { + memset(&rx, 0xff, sizeof(rx)); + if (ops->get_queue_stats_rx) + ops->get_queue_stats_rx(netdev, i, &rx); + netdev_nl_stats_add(rx_sum, &rx, sizeof(rx)); + } + for (i = tx_start; i < tx_end; i++) { + memset(&tx, 0xff, sizeof(tx)); + if (ops->get_queue_stats_tx) + ops->get_queue_stats_tx(netdev, i, &tx); + netdev_nl_stats_add(tx_sum, &tx, sizeof(tx)); + } +} +EXPORT_SYMBOL(netdev_stat_queue_sum); + static int netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { - struct netdev_queue_stats_rx rx_sum, rx; - struct netdev_queue_stats_tx tx_sum, tx; - const struct netdev_stat_ops *ops; + struct netdev_queue_stats_rx rx_sum; + struct netdev_queue_stats_tx tx_sum; void *hdr; - int i; - ops = netdev->stat_ops; /* Netdev can't guarantee any complete counters */ - if (!ops->get_base_stats) + if (!netdev->stat_ops->get_base_stats) return 0; memset(&rx_sum, 0xff, sizeof(rx_sum)); memset(&tx_sum, 0xff, sizeof(tx_sum)); - ops->get_base_stats(netdev, &rx_sum, &tx_sum); + netdev->stat_ops->get_base_stats(netdev, &rx_sum, &tx_sum); /* The op was there, but nothing reported, don't bother */ if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) && @@ -739,18 +780,8 @@ netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp, if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex)) goto nla_put_failure; - for (i = 0; i < netdev->real_num_rx_queues; i++) { - memset(&rx, 0xff, sizeof(rx)); - if (ops->get_queue_stats_rx) - ops->get_queue_stats_rx(netdev, i, &rx); - netdev_nl_stats_add(&rx_sum, &rx, sizeof(rx)); - } - for (i = 0; i < netdev->real_num_tx_queues; i++) { - memset(&tx, 0xff, sizeof(tx)); - if (ops->get_queue_stats_tx) - ops->get_queue_stats_tx(netdev, i, &tx); - netdev_nl_stats_add(&tx_sum, &tx, sizeof(tx)); - } + netdev_stat_queue_sum(netdev, 0, netdev->real_num_rx_queues, &rx_sum, + 0, netdev->real_num_tx_queues, &tx_sum); if (netdev_nl_stats_write_rx(rsp, &rx_sum) || netdev_nl_stats_write_tx(rsp, &tx_sum)) From 001160ec8c59115efc39e197d40829bdafd4d7f5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 May 2025 17:32:21 -0700 Subject: [PATCH 527/559] virtio-net: fix total qstat values NIPA tests report that the interface statistics reported via qstat are lower than those reported via ip link. Looks like this is because some tests flip the queue count up and down, and we end up with some of the traffic accounted on disabled queues. Add up counters from disabled queues. Fixes: d888f04c09bb ("virtio-net: support queue stat") Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20250507003221.823267-3-kuba@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/virtio_net.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f9e3e628ec4d..e53ba600605a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -5678,6 +5678,10 @@ static void virtnet_get_base_stats(struct net_device *dev, if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) tx->hw_drop_ratelimits = 0; + + netdev_stat_queue_sum(dev, + dev->real_num_rx_queues, vi->max_queue_pairs, rx, + dev->real_num_tx_queues, vi->max_queue_pairs, tx); } static const struct netdev_stat_ops virtnet_stat_ops = { From 10aba126bc86904e2f5afeaa26354e877a593c95 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Tue, 6 May 2025 01:15:11 +0200 Subject: [PATCH 528/559] MAINTAINERS: Remove entry for Seth Heasley Seth's mails bounce back, remove his maintainership. Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250505231511.3175151-1-andi.shyti@kernel.org --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 69511c3b2b76..70229ed0c55a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11235,7 +11235,6 @@ S: Maintained F: drivers/i2c/busses/i2c-cht-wc.c I2C/SMBUS ISMT DRIVER -M: Seth Heasley M: Neil Horman L: linux-i2c@vger.kernel.org F: Documentation/i2c/busses/i2c-ismt.rst From fd94de9f9e7aac11ec659e386b9db1203d502023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Tue, 22 Apr 2025 18:23:08 +0200 Subject: [PATCH 529/559] riscv: misaligned: factorize trap handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since both load/store and user/kernel should use almost the same path and that we are going to add some code around that, factorize it. Signed-off-by: Clément Léger Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250422162324.956065-2-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/traps.c | 74 +++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 8ff8e8b36524..b1d991c78a23 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -198,47 +198,53 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re DO_ERROR_INFO(do_trap_load_fault, SIGSEGV, SEGV_ACCERR, "load access fault"); +enum misaligned_access_type { + MISALIGNED_STORE, + MISALIGNED_LOAD, +}; +static const struct { + const char *type_str; + int (*handler)(struct pt_regs *regs); +} misaligned_handler[] = { + [MISALIGNED_STORE] = { + .type_str = "Oops - store (or AMO) address misaligned", + .handler = handle_misaligned_store, + }, + [MISALIGNED_LOAD] = { + .type_str = "Oops - load address misaligned", + .handler = handle_misaligned_load, + }, +}; + +static void do_trap_misaligned(struct pt_regs *regs, enum misaligned_access_type type) +{ + irqentry_state_t state; + + if (user_mode(regs)) + irqentry_enter_from_user_mode(regs); + else + state = irqentry_nmi_enter(regs); + + if (misaligned_handler[type].handler(regs)) + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + misaligned_handler[type].type_str); + + if (user_mode(regs)) + irqentry_exit_to_user_mode(regs); + else + irqentry_nmi_exit(regs, state); +} + asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs) { - if (user_mode(regs)) { - irqentry_enter_from_user_mode(regs); - - if (handle_misaligned_load(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - load address misaligned"); - - irqentry_exit_to_user_mode(regs); - } else { - irqentry_state_t state = irqentry_nmi_enter(regs); - - if (handle_misaligned_load(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - load address misaligned"); - - irqentry_nmi_exit(regs, state); - } + do_trap_misaligned(regs, MISALIGNED_LOAD); } asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs *regs) { - if (user_mode(regs)) { - irqentry_enter_from_user_mode(regs); - - if (handle_misaligned_store(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - store (or AMO) address misaligned"); - - irqentry_exit_to_user_mode(regs); - } else { - irqentry_state_t state = irqentry_nmi_enter(regs); - - if (handle_misaligned_store(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - store (or AMO) address misaligned"); - - irqentry_nmi_exit(regs, state); - } + do_trap_misaligned(regs, MISALIGNED_STORE); } + DO_ERROR_INFO(do_trap_store_fault, SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault"); DO_ERROR_INFO(do_trap_ecall_s, From 453805f0a28fc5091e46145e6560c776f7c7a611 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Tue, 22 Apr 2025 18:23:09 +0200 Subject: [PATCH 530/559] riscv: misaligned: enable IRQs while handling misaligned accesses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can safely reenable IRQs if coming from userspace. This allows to access user memory that could potentially trigger a page fault. Fixes: b686ecdeacf6 ("riscv: misaligned: Restrict user access to kernel memory") Signed-off-by: Clément Léger Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250422162324.956065-3-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/traps.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index b1d991c78a23..9c83848797a7 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -220,19 +220,23 @@ static void do_trap_misaligned(struct pt_regs *regs, enum misaligned_access_type { irqentry_state_t state; - if (user_mode(regs)) + if (user_mode(regs)) { irqentry_enter_from_user_mode(regs); - else + local_irq_enable(); + } else { state = irqentry_nmi_enter(regs); + } if (misaligned_handler[type].handler(regs)) do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, misaligned_handler[type].type_str); - if (user_mode(regs)) + if (user_mode(regs)) { + local_irq_disable(); irqentry_exit_to_user_mode(regs); - else + } else { irqentry_nmi_exit(regs, state); + } } asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs) From 897e8aece3c8a1a66b3eae58df1832a524d45319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Tue, 22 Apr 2025 18:23:10 +0200 Subject: [PATCH 531/559] riscv: misaligned: use get_user() instead of __get_user() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we can safely handle user memory accesses while in the misaligned access handlers, use get_user() instead of __get_user() to have user memory access checks. Signed-off-by: Clément Léger Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250422162324.956065-4-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/traps_misaligned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index dde5d11dc1b5..77c788660223 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -275,7 +275,7 @@ static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset, int __ret; \ \ if (user_mode(regs)) { \ - __ret = __get_user(insn, (type __user *) insn_addr); \ + __ret = get_user(insn, (type __user *) insn_addr); \ } else { \ insn = *(type *)insn_addr; \ __ret = 0; \ From ae08d55807c099357c047dba17624b09414635dd Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Sun, 4 May 2025 12:19:20 +0200 Subject: [PATCH 532/559] riscv: Fix kernel crash due to PR_SET_TAGGED_ADDR_CTRL When userspace does PR_SET_TAGGED_ADDR_CTRL, but Supm extension is not available, the kernel crashes: Oops - illegal instruction [#1] [snip] epc : set_tagged_addr_ctrl+0x112/0x15a ra : set_tagged_addr_ctrl+0x74/0x15a epc : ffffffff80011ace ra : ffffffff80011a30 sp : ffffffc60039be10 [snip] status: 0000000200000120 badaddr: 0000000010a79073 cause: 0000000000000002 set_tagged_addr_ctrl+0x112/0x15a __riscv_sys_prctl+0x352/0x73c do_trap_ecall_u+0x17c/0x20c andle_exception+0x150/0x15c Fix it by checking if Supm is available. Fixes: 09d6775f503b ("riscv: Add support for userspace pointer masking") Signed-off-by: Nam Cao Cc: stable@vger.kernel.org Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20250504101920.3393053-1-namcao@linutronix.de Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 7c244de77180..3db2c0c07acd 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -275,6 +275,9 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) unsigned long pmm; u8 pmlen; + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + return -EINVAL; + if (is_compat_thread(ti)) return -EINVAL; From e9d86b8e17e725ee6088970981ab99f29abd1997 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 8 Apr 2025 09:28:51 +0200 Subject: [PATCH 533/559] scripts: Do not strip .rela.dyn section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .rela.dyn section contains runtime relocations and is only emitted for a relocatable kernel. riscv uses this section to relocate the kernel at runtime but that section is stripped from vmlinux. That prevents kexec to successfully load vmlinux since it does not contain the relocations info needed. Fixes: 559d1e45a16d ("riscv: Use --emit-relocs in order to move .rela.dyn in init") Tested-by: Björn Töpel Reviewed-by: Björn Töpel Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250408072851.90275-1-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti --- scripts/Makefile.vmlinux | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index b0a6cd5b818c..85d60d986401 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -13,7 +13,7 @@ ifdef CONFIG_ARCH_VMLINUX_NEEDS_RELOCS vmlinux-final := vmlinux.unstripped quiet_cmd_strip_relocs = RSTRIP $@ - cmd_strip_relocs = $(OBJCOPY) --remove-section='.rel*' $< $@ + cmd_strip_relocs = $(OBJCOPY) --remove-section='.rel*' --remove-section=!'.rel*.dyn' $< $@ vmlinux: $(vmlinux-final) FORCE $(call if_changed,strip_relocs) From 7f1c3de1370bc6a8ad5157336b258067dac0ae9c Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 7 May 2025 07:52:18 -0700 Subject: [PATCH 534/559] riscv: Disallow PR_GET_TAGGED_ADDR_CTRL without Supm When the prctl() interface for pointer masking was added, it did not check that the pointer masking ISA extension was supported, only the individual submodes. Userspace could still attempt to disable pointer masking and query the pointer masking state. commit 81de1afb2dd1 ("riscv: Fix kernel crash due to PR_SET_TAGGED_ADDR_CTRL") disallowed the former, as the senvcfg write could crash on older systems. PR_GET_TAGGED_ADDR_CTRL state does not crash, because it reads only kernel-internal state and not senvcfg, but it should still be disallowed for consistency. Fixes: 09d6775f503b ("riscv: Add support for userspace pointer masking") Signed-off-by: Samuel Holland Reviewed-by: Nam Cao Link: https://lore.kernel.org/r/20250507145230.2272871-1-samuel.holland@sifive.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 3db2c0c07acd..15d8f75902f8 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -333,6 +333,9 @@ long get_tagged_addr_ctrl(struct task_struct *task) struct thread_info *ti = task_thread_info(task); long ret = 0; + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + return -EINVAL; + if (is_compat_thread(ti)) return -EINVAL; From e3417ab75ab2e7dca6372a1bfa26b1be3ac5889e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 5 May 2025 11:03:00 -0700 Subject: [PATCH 535/559] KVM: SVM: Set/clear SRSO's BP_SPEC_REDUCE on 0 <=> 1 VM count transitions Set the magic BP_SPEC_REDUCE bit to mitigate SRSO when running VMs if and only if KVM has at least one active VM. Leaving the bit set at all times unfortunately degrades performance by a wee bit more than expected. Use a dedicated spinlock and counter instead of hooking virtualization enablement, as changing the behavior of kvm.enable_virt_at_load based on SRSO_BP_SPEC_REDUCE is painful, and has its own drawbacks, e.g. could result in performance issues for flows that are sensitive to VM creation latency. Defer setting BP_SPEC_REDUCE until VMRUN is imminent to avoid impacting performance on CPUs that aren't running VMs, e.g. if a setup is using housekeeping CPUs. Setting BP_SPEC_REDUCE in task context, i.e. without blasting IPIs to all CPUs, also helps avoid serializing 1<=>N transitions without incurring a gross amount of complexity (see the Link for details on how ugly coordinating via IPIs gets). Link: https://lore.kernel.org/all/aBOnzNCngyS_pQIW@google.com Fixes: 8442df2b49ed ("x86/bugs: KVM: Add support for SRSO_MSR_FIX") Reported-by: Michael Larabel Closes: https://www.phoronix.com/review/linux-615-amd-regression Cc: Borislav Petkov Tested-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20250505180300.973137-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 71 ++++++++++++++++++++++++++++++++++++++---- arch/x86/kvm/svm/svm.h | 2 ++ 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c5470d842aed..a89c271a1951 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -607,9 +607,6 @@ static void svm_disable_virtualization_cpu(void) kvm_cpu_svm_disable(); amd_pmu_disable_virt(); - - if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) - msr_clear_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); } static int svm_enable_virtualization_cpu(void) @@ -687,9 +684,6 @@ static int svm_enable_virtualization_cpu(void) rdmsr(MSR_TSC_AUX, sev_es_host_save_area(sd)->tsc_aux, msr_hi); } - if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) - msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); - return 0; } @@ -1518,6 +1512,63 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE)); } +#ifdef CONFIG_CPU_MITIGATIONS +static DEFINE_SPINLOCK(srso_lock); +static atomic_t srso_nr_vms; + +static void svm_srso_clear_bp_spec_reduce(void *ign) +{ + struct svm_cpu_data *sd = this_cpu_ptr(&svm_data); + + if (!sd->bp_spec_reduce_set) + return; + + msr_clear_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); + sd->bp_spec_reduce_set = false; +} + +static void svm_srso_vm_destroy(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) + return; + + if (atomic_dec_return(&srso_nr_vms)) + return; + + guard(spinlock)(&srso_lock); + + /* + * Verify a new VM didn't come along, acquire the lock, and increment + * the count before this task acquired the lock. + */ + if (atomic_read(&srso_nr_vms)) + return; + + on_each_cpu(svm_srso_clear_bp_spec_reduce, NULL, 1); +} + +static void svm_srso_vm_init(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) + return; + + /* + * Acquire the lock on 0 => 1 transitions to ensure a potential 1 => 0 + * transition, i.e. destroying the last VM, is fully complete, e.g. so + * that a delayed IPI doesn't clear BP_SPEC_REDUCE after a vCPU runs. + */ + if (atomic_inc_not_zero(&srso_nr_vms)) + return; + + guard(spinlock)(&srso_lock); + + atomic_inc(&srso_nr_vms); +} +#else +static void svm_srso_vm_init(void) { } +static void svm_srso_vm_destroy(void) { } +#endif + static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1550,6 +1601,11 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) (!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm))) kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); + if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE) && + !sd->bp_spec_reduce_set) { + sd->bp_spec_reduce_set = true; + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); + } svm->guest_state_loaded = true; } @@ -5040,6 +5096,8 @@ static void svm_vm_destroy(struct kvm *kvm) { avic_vm_destroy(kvm); sev_vm_destroy(kvm); + + svm_srso_vm_destroy(); } static int svm_vm_init(struct kvm *kvm) @@ -5065,6 +5123,7 @@ static int svm_vm_init(struct kvm *kvm) return ret; } + svm_srso_vm_init(); return 0; } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index d4490eaed55d..f16b068c4228 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -335,6 +335,8 @@ struct svm_cpu_data { u32 next_asid; u32 min_asid; + bool bp_spec_reduce_set; + struct vmcb *save_area; unsigned long save_area_pa; From c0d0a9ff6d5b5b23ddabde8bcbafb28fa454ae00 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Thu, 8 May 2025 16:30:36 +0800 Subject: [PATCH 536/559] block: remove test of incorrect io priority level Ever since commit eca2040972b4("scsi: block: ioprio: Clean up interface definition"), the macro IOPRIO_PRIO_LEVEL() will mask the level value to something between 0 and 7 so necessarily, level will always be lower than IOPRIO_NR_LEVELS(8). Remove this obsolete check. Reported-by: Kexin Wei Cc: Damien Le Moal Signed-off-by: Aaron Lu Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20250508083018.GA769554@bytedance Signed-off-by: Jens Axboe --- block/ioprio.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/block/ioprio.c b/block/ioprio.c index 73301a261429..f0ee2798539c 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -46,12 +46,8 @@ int ioprio_check_cap(int ioprio) */ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE)) return -EPERM; - fallthrough; - /* rt has prio field too */ - case IOPRIO_CLASS_BE: - if (level >= IOPRIO_NR_LEVELS) - return -EINVAL; break; + case IOPRIO_CLASS_BE: case IOPRIO_CLASS_IDLE: break; case IOPRIO_CLASS_NONE: From 0e33e0f339b91eecd9558311449a3d1e728722d4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:46:56 -0400 Subject: [PATCH 537/559] drm/amdgpu/hdp5: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: cf424020e040 ("drm/amdgpu/hdp5.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit a5cb344033c7598762e89255e8ff52827abb57a4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 43195c079748..086a647308df 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -32,7 +32,12 @@ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From dbc988c689333faeeed44d5561f372ff20395304 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:47:37 -0400 Subject: [PATCH 538/559] drm/amdgpu/hdp5.2: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: f756dbac1ce1 ("drm/amdgpu/hdp5.2: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 4a89b7698e771914b4d5b571600c76e2fdcbe2a9) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c index fcb8dd2876bc..40940b4ab400 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c @@ -33,7 +33,17 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev, if (!ring || !ring->funcs->emit_wreg) { WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + if (amdgpu_sriov_vf(adev)) { + /* this is fine because SR_IOV doesn't remap the register */ + RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + } else { + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); + } } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, From ca28e80abe4219c8f1a2961ae05102d70af6dc87 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:48:51 -0400 Subject: [PATCH 539/559] drm/amdgpu/hdp6: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: abe1cbaec6cf ("drm/amdgpu/hdp6.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 84141ff615951359c9a99696fd79a36c465ed847) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index a88d25a06c29..6ccd31c8bc69 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -35,7 +35,12 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From 5a11a2767731139bf87e667331aa2209e33a1d19 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:50:02 -0400 Subject: [PATCH 540/559] drm/amdgpu/hdp7: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: 689275140cb8 ("drm/amdgpu/hdp7.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit dbc064adfcf9095e7d895bea87b2f75c1ab23236) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 49f7eb4fbd11..2c9239a22f39 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -32,7 +32,12 @@ static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From 391008f34e711253c5983b0bf52277cc43723127 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 8 Apr 2025 08:59:15 -0700 Subject: [PATCH 541/559] drm/xe: Add page queue multiplier For an unknown reason the math to determine the PF queue size does is not correct - compute UMD applications are overflowing the PF queue which is fatal. A multippier of 8 fixes the problem. Fixes: 3338e4f90c14 ("drm/xe: Use topology to determine page fault queue size") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Jagmeet Randhawa Link: https://lore.kernel.org/r/20250408155915.78770-1-matthew.brost@intel.com (cherry picked from commit 29582e0ea75c95668d168b12406e3c56cf5a73c4) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index c5ad9a0a89c2..0c22b3a36655 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -435,9 +435,16 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, XE_MAX_EU_FUSE_BITS) * num_dss; - /* user can issue separate page faults per EU and per CS */ + /* + * user can issue separate page faults per EU and per CS + * + * XXX: Multiplier required as compute UMD are getting PF queue errors + * without it. Follow on why this multiplier is required. + */ +#define PF_MULTIPLIER 8 pf_queue->num_dw = - (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; + (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; +#undef PF_MULTIPLIER pf_queue->gt = gt; pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw, From 51c0ee84e4dc339287b2d7335f2b54d747794c83 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 28 Apr 2025 13:53:57 +0530 Subject: [PATCH 542/559] drm/xe/tests/mocs: Hold XE_FORCEWAKE_ALL for LNCF regs LNCF registers report wrong values when XE_FORCEWAKE_GT only is held. Holding XE_FORCEWAKE_ALL ensures correct operations on LNCF regs. V2(Himal): - Use xe_force_wake_ref_has_domain Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1999 Fixes: a6a4ea6d7d37 ("drm/xe: Add mocs kunit") Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20250428082357.1730068-1-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay (cherry picked from commit 70a2585e582058e94fe4381a337be42dec800337) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/tests/xe_mocs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index ef1e5256c56a..0e502feaca81 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -46,8 +46,11 @@ static void read_l3cc_table(struct xe_gt *gt, unsigned int fw_ref, i; u32 reg_val; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n"); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + xe_force_wake_put(gt_to_fw(gt), fw_ref); + KUNIT_ASSERT_TRUE_MSG(test, true, "Forcewake Failed.\n"); + } for (i = 0; i < info->num_mocs_regs; i++) { if (!(i & 1)) { From 03552d8ac0afcc080c339faa0b726e2c0e9361cb Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 2 May 2025 08:51:04 -0700 Subject: [PATCH 543/559] drm/xe/gsc: do not flush the GSC worker from the reset path The workqueue used for the reset worker is marked as WQ_MEM_RECLAIM, while the GSC one isn't (and can't be as we need to do memory allocations in the gsc worker). Therefore, we can't flush the latter from the former. The reason why we had such a flush was to avoid interrupting either the GSC FW load or in progress GSC proxy operations. GSC proxy operations fall into 2 categories: 1) GSC proxy init: this only happens once immediately after GSC FW load and does not support being interrupted. The only way to recover from an interruption of the proxy init is to do an FLR and re-load the GSC. 2) GSC proxy request: this can happen in response to a request that the driver sends to the GSC. If this is interrupted, the GSC FW will timeout and the driver request will be failed, but overall the GSC will keep working fine. Flushing the work allowed us to avoid interruption in both cases (unless the hang came from the GSC engine itself, in which case we're toast anyway). However, a failure on a proxy request is tolerable if we're in a scenario where we're triggering a GT reset (i.e., something is already gone pretty wrong), so what we really need to avoid is interrupting the init flow, which we can do by polling on the register that reports when the proxy init is complete (as that ensure us that all the load and init operations have been completed). Note that during suspend we still want to do a flush of the worker to make sure it completes any operations involving the HW before the power is cut. v2: fix spelling in commit msg, rename waiter function (Julia) Fixes: dd0e89e5edc2 ("drm/xe/gsc: GSC FW load") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4830 Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Alan Previn Cc: # v6.8+ Reviewed-by: Julia Filipchuk Link: https://lore.kernel.org/r/20250502155104.2201469-1-daniele.ceraolospurio@intel.com (cherry picked from commit 12370bfcc4f0bdf70279ec5b570eb298963422b5) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gsc.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gsc.h | 1 + drivers/gpu/drm/xe/xe_gsc_proxy.c | 11 +++++++++++ drivers/gpu/drm/xe/xe_gsc_proxy.h | 1 + drivers/gpu/drm/xe/xe_gt.c | 2 +- drivers/gpu/drm/xe/xe_uc.c | 8 +++++++- drivers/gpu/drm/xe/xe_uc.h | 1 + 7 files changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index fd41113f8572..0bcf97063ff6 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -555,6 +555,28 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) flush_work(&gsc->work); } +void xe_gsc_stop_prepare(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + int ret; + + if (!xe_uc_fw_is_loadable(&gsc->fw) || xe_uc_fw_is_in_error_state(&gsc->fw)) + return; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GSC); + + /* + * If the GSC FW load or the proxy init are interrupted, the only way + * to recover it is to do an FLR and reload the GSC from scratch. + * Therefore, let's wait for the init to complete before stopping + * operations. The proxy init is the last step, so we can just wait on + * that + */ + ret = xe_gsc_wait_for_proxy_init_done(gsc); + if (ret) + xe_gt_err(gt, "failed to wait for GSC init completion before uc stop\n"); +} + /* * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a * GSC engine reset by writing a notification bit in the GS1 register and then diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index d99f66c38075..b8b8e0810ad9 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -16,6 +16,7 @@ struct xe_hw_engine; int xe_gsc_init(struct xe_gsc *gsc); int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); +void xe_gsc_stop_prepare(struct xe_gsc *gsc); void xe_gsc_load_start(struct xe_gsc *gsc); void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 8cf70b228ff3..d0519cd6704a 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -71,6 +71,17 @@ bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) HECI1_FWSTS1_PROXY_STATE_NORMAL; } +int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + + /* Proxy init can take up to 500ms, so wait double that for safety */ + return xe_mmio_wait32(>->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE), + HECI1_FWSTS1_CURRENT_STATE, + HECI1_FWSTS1_PROXY_STATE_NORMAL, + USEC_PER_SEC, NULL, false); +} + static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set) { struct xe_gt *gt = gsc_to_gt(gsc); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h index fdef56995cd4..765602221dbc 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.h +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h @@ -12,6 +12,7 @@ struct xe_gsc; int xe_gsc_proxy_init(struct xe_gsc *gsc); bool xe_gsc_proxy_init_done(struct xe_gsc *gsc); +int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc); int xe_gsc_proxy_start(struct xe_gsc *gsc); int xe_gsc_proxy_request_handler(struct xe_gsc *gsc); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 10a9e3c72b36..66198cf2662c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -857,7 +857,7 @@ void xe_gt_suspend_prepare(struct xe_gt *gt) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_uc_stop_prepare(>->uc); + xe_uc_suspend_prepare(>->uc); xe_force_wake_put(gt_to_fw(gt), fw_ref); } diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index c14bd2282044..3a8751a8b92d 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -244,7 +244,7 @@ void xe_uc_gucrc_disable(struct xe_uc *uc) void xe_uc_stop_prepare(struct xe_uc *uc) { - xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_gsc_stop_prepare(&uc->gsc); xe_guc_stop_prepare(&uc->guc); } @@ -278,6 +278,12 @@ again: goto again; } +void xe_uc_suspend_prepare(struct xe_uc *uc) +{ + xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_guc_stop_prepare(&uc->guc); +} + int xe_uc_suspend(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 3813c1ede450..c23e6f5e2514 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -18,6 +18,7 @@ int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); void xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); +void xe_uc_suspend_prepare(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); int xe_uc_sanitize_reset(struct xe_uc *uc); void xe_uc_declare_wedged(struct xe_uc *uc); From 9d271a4f5ba52520e448ab223b1a91c6e35f17c7 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 7 May 2025 02:23:02 +0000 Subject: [PATCH 544/559] drm/xe: Release force wake first then runtime power xe_force_wake_get() is dependent on xe_pm_runtime_get(), so for the release path, xe_force_wake_put() should be called first then xe_pm_runtime_put(). Combine the error path and normal path together with goto. Fixes: 85d547608ef5 ("drm/xe/xe_gt_debugfs: Update handling of xe_force_wake_get return") Cc: Himal Prasad Ghimiray Cc: Rodrigo Vivi Signed-off-by: Shuicheng Lin Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20250507022302.2187527-1-shuicheng.lin@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 432cd94efdca06296cc5e76d673546f58aa90ee1) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 2d63a69cbfa3..f7005a3643e6 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -92,22 +92,23 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p) struct xe_hw_engine *hwe; enum xe_hw_engine_id id; unsigned int fw_ref; + int ret = 0; xe_pm_runtime_get(xe); fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - xe_pm_runtime_put(xe); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto fw_put; } for_each_hw_engine(hwe, gt, id) xe_hw_engine_print(hwe, p); +fw_put: xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); - return 0; + return ret; } static int powergate_info(struct xe_gt *gt, struct drm_printer *p) From 564467e9d06c6352fac9100e8957d40a1a50234c Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 2 May 2025 17:00:52 +0000 Subject: [PATCH 545/559] drm/xe: Add config control for svm flush work Without CONFIG_DRM_XE_GPUSVM set, GPU SVM is not initialized thus below warning pops. Refine the flush work code to be controlled by the config to avoid below warning: " [ 453.132028] ------------[ cut here ]------------ [ 453.132527] WARNING: CPU: 9 PID: 4491 at kernel/workqueue.c:4205 __flush_work+0x379/0x3a0 [ 453.133355] Modules linked in: xe drm_ttm_helper ttm gpu_sched drm_buddy drm_suballoc_helper drm_gpuvm drm_exec [ 453.134352] CPU: 9 UID: 0 PID: 4491 Comm: xe_exec_mix_mod Tainted: G U W 6.15.0-rc3+ #7 PREEMPT(full) [ 453.135405] Tainted: [U]=USER, [W]=WARN ... [ 453.136921] RIP: 0010:__flush_work+0x379/0x3a0 [ 453.137417] Code: 8b 45 00 48 8b 55 08 89 c7 48 c1 e8 04 83 e7 08 83 e0 0f 83 cf 02 89 c6 48 0f ba 6d 00 03 e9 d5 fe ff ff 0f 0b e9 db fd ff ff <0f> 0b 45 31 e4 e9 d1 fd ff ff 0f 0b e9 03 ff ff ff 0f 0b e9 d6 fe [ 453.139250] RSP: 0018:ffffc90000c67b18 EFLAGS: 00010246 [ 453.139782] RAX: 0000000000000000 RBX: ffff888108a24000 RCX: 0000000000002000 [ 453.140521] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff8881016d61c8 [ 453.141253] RBP: ffff8881016d61c8 R08: 0000000000000000 R09: 0000000000000000 [ 453.141985] R10: 0000000000000000 R11: 0000000008a24000 R12: 0000000000000001 [ 453.142709] R13: 0000000000000002 R14: 0000000000000000 R15: ffff888107db8c00 [ 453.143450] FS: 00007f44853d4c80(0000) GS:ffff8882f469b000(0000) knlGS:0000000000000000 [ 453.144276] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 453.144853] CR2: 00007f4487629228 CR3: 00000001016aa000 CR4: 00000000000406f0 [ 453.145594] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 453.146320] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 453.147061] Call Trace: [ 453.147336] [ 453.147579] ? tick_nohz_tick_stopped+0xd/0x30 [ 453.148067] ? xas_load+0x9/0xb0 [ 453.148435] ? xa_load+0x6f/0xb0 [ 453.148781] __xe_vm_bind_ioctl+0xbd5/0x1500 [xe] [ 453.149338] ? dev_printk_emit+0x48/0x70 [ 453.149762] ? _dev_printk+0x57/0x80 [ 453.150148] ? drm_ioctl+0x17c/0x440 [ 453.150544] ? __drm_dev_vprintk+0x36/0x90 [ 453.150983] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 453.151575] ? drm_ioctl_kernel+0x9f/0xf0 [ 453.151998] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 453.152560] drm_ioctl_kernel+0x9f/0xf0 [ 453.152968] drm_ioctl+0x20f/0x440 [ 453.153332] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 453.153893] ? ioctl_has_perm.constprop.0.isra.0+0xae/0x100 [ 453.154489] ? memory_bm_test_bit+0x5/0x60 [ 453.154935] xe_drm_ioctl+0x47/0x70 [xe] [ 453.155419] __x64_sys_ioctl+0x8d/0xc0 [ 453.155824] do_syscall_64+0x47/0x110 [ 453.156228] entry_SYSCALL_64_after_hwframe+0x76/0x7e " v2 (Matt): refine commit message to have more details add Fixes tag move the code to xe_svm.h which already have the config remove a blank line per codestyle suggestion Fixes: 63f6e480d115 ("drm/xe: Add SVM garbage collector") Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250502170052.1787973-1-shuicheng.lin@intel.com (cherry picked from commit 9d80698bcd97a5ad1088bcbb055e73fd068895e2) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 12 ++++++++++++ drivers/gpu/drm/xe/xe_svm.h | 8 ++++++++ drivers/gpu/drm/xe/xe_vm.c | 3 +-- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 0b6547c06961..24c578e1170e 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -947,3 +947,15 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) return 0; } #endif + +/** + * xe_svm_flush() - SVM flush + * @vm: The VM. + * + * Flush all SVM actions. + */ +void xe_svm_flush(struct xe_vm *vm) +{ + if (xe_vm_in_fault_mode(vm)) + flush_work(&vm->svm.garbage_collector.work); +} diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index e059590e5076..be306fe7aaa4 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -72,6 +72,9 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end); int xe_svm_bo_evict(struct xe_bo *bo); void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); + +void xe_svm_flush(struct xe_vm *vm); + #else static inline bool xe_svm_range_pages_valid(struct xe_svm_range *range) { @@ -124,6 +127,11 @@ static inline void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) { } + +static inline void xe_svm_flush(struct xe_vm *vm) +{ +} + #endif /** diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 60303998bd61..367c84b90e9e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3312,8 +3312,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } /* Ensure all UNMAPs visible */ - if (xe_vm_in_fault_mode(vm)) - flush_work(&vm->svm.garbage_collector.work); + xe_svm_flush(vm); err = down_write_killable(&vm->lock); if (err) From da8bf5daa5e55a6af2b285ecda460d6454712ff4 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 8 May 2025 12:24:10 -0500 Subject: [PATCH 546/559] memblock: Accept allocated memory before use in memblock_double_array() When increasing the array size in memblock_double_array() and the slab is not yet available, a call to memblock_find_in_range() is used to reserve/allocate memory. However, the range returned may not have been accepted, which can result in a crash when booting an SNP guest: RIP: 0010:memcpy_orig+0x68/0x130 Code: ... RSP: 0000:ffffffff9cc03ce8 EFLAGS: 00010006 RAX: ff11001ff83e5000 RBX: 0000000000000000 RCX: fffffffffffff000 RDX: 0000000000000bc0 RSI: ffffffff9dba8860 RDI: ff11001ff83e5c00 RBP: 0000000000002000 R08: 0000000000000000 R09: 0000000000002000 R10: 000000207fffe000 R11: 0000040000000000 R12: ffffffff9d06ef78 R13: ff11001ff83e5000 R14: ffffffff9dba7c60 R15: 0000000000000c00 memblock_double_array+0xff/0x310 memblock_add_range+0x1fb/0x2f0 memblock_reserve+0x4f/0xa0 memblock_alloc_range_nid+0xac/0x130 memblock_alloc_internal+0x53/0xc0 memblock_alloc_try_nid+0x3d/0xa0 swiotlb_init_remap+0x149/0x2f0 mem_init+0xb/0xb0 mm_core_init+0x8f/0x350 start_kernel+0x17e/0x5d0 x86_64_start_reservations+0x14/0x30 x86_64_start_kernel+0x92/0xa0 secondary_startup_64_no_verify+0x194/0x19b Mitigate this by calling accept_memory() on the memory range returned before the slab is available. Prior to v6.12, the accept_memory() interface used a 'start' and 'end' parameter instead of 'start' and 'size', therefore the accept_memory() call must be adjusted to specify 'start + size' for 'end' when applying to kernels prior to v6.12. Cc: stable@vger.kernel.org # see patch description, needs adjustments for <= 6.11 Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory") Signed-off-by: Tom Lendacky Link: https://lore.kernel.org/r/da1ac73bf4ded761e21b4e4bb5178382a580cd73.1746725050.git.thomas.lendacky@amd.com Signed-off-by: Mike Rapoport (Microsoft) --- mm/memblock.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index d3509414b8c3..0e9ebb8aa7fe 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -457,7 +457,14 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, min(new_area_start, memblock.current_limit), new_alloc_size, PAGE_SIZE); - new_array = addr ? __va(addr) : NULL; + if (addr) { + /* The memory may not have been accepted, yet. */ + accept_memory(addr, new_alloc_size); + + new_array = __va(addr); + } else { + new_array = NULL; + } } if (!addr) { pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", From 732b87a409667a370b87955c518e5d004de740b5 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 7 May 2025 18:19:53 +0300 Subject: [PATCH 547/559] drm/i915/dp: Fix determining SST/MST mode during MTP TU state computation Determining the SST/MST mode during state computation must be done based on the output type stored in the CRTC state, which in turn is set once based on the modeset connector's SST vs. MST type and will not change as long as the connector is using the CRTC. OTOH the MST mode indicated by the given connector's intel_dp::is_mst flag can change independently of the above output type, based on what sink is at any moment plugged to the connector. Fix the state computation accordingly. Cc: Jani Nikula Fixes: f6971d7427c2 ("drm/i915/mst: adapt intel_dp_mtp_tu_compute_config() for 128b/132b SST") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4607 Reviewed-by: Jani Nikula Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250507151953.251846-1-imre.deak@intel.com (cherry picked from commit 0f45696ddb2b901fbf15cb8d2e89767be481d59f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 02f95108c637..6dc2d31ccb5a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -242,7 +242,7 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp, to_intel_connector(conn_state->connector); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - bool is_mst = intel_dp->is_mst; + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); int bpp_x16, slots = -EINVAL; int dsc_slice_count = 0; int max_dpt_bpp_x16; From 92835cebab120f8a5f023a26a792a2ac3f816c4f Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 8 May 2025 14:12:03 -0400 Subject: [PATCH 548/559] io_uring/sqpoll: Increase task_work submission batch size Our QA team reported a 10%-23%, throughput reduction on an io_uring sqpoll testcase doing IO to a null_blk, that I traced back to a reduction of the device submission queue depth utilization. It turns out that, after commit af5d68f8892f ("io_uring/sqpoll: manage task_work privately"), we capped the number of task_work entries that can be completed from a single spin of sqpoll to only 8 entries, before the sqpoll goes around to (potentially) sleep. While this cap doesn't drive the submission side directly, it impacts the completion behavior, which affects the number of IO queued by fio per sqpoll cycle on the submission side, and io_uring ends up seeing less ios per sqpoll cycle. As a result, block layer plugging is less effective, and we see more time spent inside the block layer in profilings charts, and increased submission latency measured by fio. There are other places that have increased overhead once sqpoll sleeps more often, such as the sqpoll utilization calculation. But, in this microbenchmark, those were not representative enough in perf charts, and their removal didn't yield measurable changes in throughput. The major overhead comes from the fact we plug less, and less often, when submitting to the block layer. My benchmark is: fio --ioengine=io_uring --direct=1 --iodepth=128 --runtime=300 --bs=4k \ --invalidate=1 --time_based --ramp_time=10 --group_reporting=1 \ --filename=/dev/nullb0 --name=RandomReads-direct-nullb-sqpoll-4k-1 \ --rw=randread --numjobs=1 --sqthread_poll In one machine, tested on top of Linux 6.15-rc1, we have the following baseline: READ: bw=4994MiB/s (5236MB/s), 4994MiB/s-4994MiB/s (5236MB/s-5236MB/s), io=439GiB (471GB), run=90001-90001msec With this patch: READ: bw=5762MiB/s (6042MB/s), 5762MiB/s-5762MiB/s (6042MB/s-6042MB/s), io=506GiB (544GB), run=90001-90001msec which is a 15% improvement in measured bandwidth. The average submission latency is noticeably lowered too. As measured by fio: Baseline: lat (usec): min=20, max=241, avg=99.81, stdev=3.38 Patched: lat (usec): min=26, max=226, avg=86.48, stdev=4.82 If we look at blktrace, we can also see the plugging behavior is improved. In the baseline, we end up limited to plugging 8 requests in the block layer regardless of the device queue depth size, while after patching we can drive more io, and we manage to utilize the full device queue. In the baseline, after a stabilization phase, an ordinary submission looks like: 254,0 1 49942 0.016028795 5977 U N [iou-sqp-5976] 7 After patching, I see consistently more requests per unplug. 254,0 1 4996 0.001432872 3145 U N [iou-sqp-3144] 32 Ideally, the cap size would at least be the deep enough to fill the device queue, but we can't predict that behavior, or assume all IO goes to a single device, and thus can't guess the ideal batch size. We also don't want to let the tw run unbounded, though I'm not sure it would really be a problem. Instead, let's just give it a more sensible value that will allow for more efficient batching. I've tested with different cap values, and initially proposed to increase the cap to 1024. Jens argued it is too big of a bump and I observed that, with 32, I'm no longer able to observe this bottleneck in any of my machines. Fixes: af5d68f8892f ("io_uring/sqpoll: manage task_work privately") Signed-off-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/20250508181203.3785544-1-krisman@suse.de Signed-off-by: Jens Axboe --- io_uring/sqpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index d037cc68e9d3..03c699493b5a 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -20,7 +20,7 @@ #include "sqpoll.h" #define IORING_SQPOLL_CAP_ENTRIES_VALUE 8 -#define IORING_TW_CAP_ENTRIES_VALUE 8 +#define IORING_TW_CAP_ENTRIES_VALUE 32 enum { IO_SQ_THREAD_SHOULD_STOP = 0, From fea4e317f9e7e1f449ce90dedc27a2d2a95bee5a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 8 May 2025 15:41:32 -0700 Subject: [PATCH 549/559] x86/mm: Eliminate window where TLB flushes may be inadvertently skipped tl;dr: There is a window in the mm switching code where the new CR3 is set and the CPU should be getting TLB flushes for the new mm. But should_flush_tlb() has a bug and suppresses the flush. Fix it by widening the window where should_flush_tlb() sends an IPI. Long Version: === History === There were a few things leading up to this. First, updating mm_cpumask() was observed to be too expensive, so it was made lazier. But being lazy caused too many unnecessary IPIs to CPUs due to the now-lazy mm_cpumask(). So code was added to cull mm_cpumask() periodically[2]. But that culling was a bit too aggressive and skipped sending TLB flushes to CPUs that need them. So here we are again. === Problem === The too-aggressive code in should_flush_tlb() strikes in this window: // Turn on IPIs for this CPU/mm combination, but only // if should_flush_tlb() agrees: cpumask_set_cpu(cpu, mm_cpumask(next)); next_tlb_gen = atomic64_read(&next->context.tlb_gen); choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); load_new_mm_cr3(need_flush); // ^ After 'need_flush' is set to false, IPIs *MUST* // be sent to this CPU and not be ignored. this_cpu_write(cpu_tlbstate.loaded_mm, next); // ^ Not until this point does should_flush_tlb() // become true! should_flush_tlb() will suppress TLB flushes between load_new_mm_cr3() and writing to 'loaded_mm', which is a window where they should not be suppressed. Whoops. === Solution === Thankfully, the fuzzy "just about to write CR3" window is already marked with loaded_mm==LOADED_MM_SWITCHING. Simply checking for that state in should_flush_tlb() is sufficient to ensure that the CPU is targeted with an IPI. This will cause more TLB flush IPIs. But the window is relatively small and I do not expect this to cause any kind of measurable performance impact. Update the comment where LOADED_MM_SWITCHING is written since it grew yet another user. Peter Z also raised a concern that should_flush_tlb() might not observe 'loaded_mm' and 'is_lazy' in the same order that switch_mm_irqs_off() writes them. Add a barrier to ensure that they are observed in the order they are written. Signed-off-by: Dave Hansen Acked-by: Rik van Riel Link: https://lore.kernel.org/oe-lkp/202411282207.6bd28eae-lkp@intel.com/ [1] Fixes: 6db2526c1d69 ("x86/mm/tlb: Only trim the mm_cpumask once a second") [2] Reported-by: Stephen Dolan Cc: stable@vger.kernel.org Acked-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds --- arch/x86/mm/tlb.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index eb83348f9305..b6d6750e4bd1 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -899,8 +899,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, cond_mitigation(tsk); /* - * Let nmi_uaccess_okay() and finish_asid_transition() - * know that CR3 is changing. + * Indicate that CR3 is about to change. nmi_uaccess_okay() + * and others are sensitive to the window where mm_cpumask(), + * CR3 and cpu_tlbstate.loaded_mm are not all in sync. */ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); barrier(); @@ -1204,8 +1205,16 @@ done: static bool should_flush_tlb(int cpu, void *data) { + struct mm_struct *loaded_mm = per_cpu(cpu_tlbstate.loaded_mm, cpu); struct flush_tlb_info *info = data; + /* + * Order the 'loaded_mm' and 'is_lazy' against their + * write ordering in switch_mm_irqs_off(). Ensure + * 'is_lazy' is at least as new as 'loaded_mm'. + */ + smp_rmb(); + /* Lazy TLB will get flushed at the next context switch. */ if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) return false; @@ -1214,8 +1223,15 @@ static bool should_flush_tlb(int cpu, void *data) if (!info->mm) return true; + /* + * While switching, the remote CPU could have state from + * either the prev or next mm. Assume the worst and flush. + */ + if (loaded_mm == LOADED_MM_SWITCHING) + return true; + /* The target mm is loaded, and the CPU is not lazy. */ - if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm) + if (loaded_mm == info->mm) return true; /* In cpumask, but not the loaded mm? Periodically remove by flushing. */ From 250cf3693060a5f803c5f1ddc082bb06b16112a9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 27 Apr 2025 15:41:51 -0400 Subject: [PATCH 550/559] __legitimize_mnt(): check for MNT_SYNC_UMOUNT should be under mount_lock ... or we risk stealing final mntput from sync umount - raising mnt_count after umount(2) has verified that victim is not busy, but before it has set MNT_SYNC_UMOUNT; in that case __legitimize_mnt() doesn't see that it's safe to quietly undo mnt_count increment and leaves dropping the reference to caller, where it'll be a full-blown mntput(). Check under mount_lock is needed; leaving the current one done before taking that makes no sense - it's nowhere near common enough to bother with. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/namespace.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 98a5cd756e9a..eba4748388b1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -790,12 +790,8 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) smp_mb(); // see mntput_no_expire() if (likely(!read_seqretry(&mount_lock, seq))) return 0; - if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { - mnt_add_count(mnt, -1); - return 1; - } lock_mount_hash(); - if (unlikely(bastard->mnt_flags & MNT_DOOMED)) { + if (unlikely(bastard->mnt_flags & (MNT_SYNC_UMOUNT | MNT_DOOMED))) { mnt_add_count(mnt, -1); unlock_mount_hash(); return 1; From 65781e19dcfcb4aed1167d87a3ffcc2a0c071d47 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 28 Apr 2025 23:56:14 -0400 Subject: [PATCH 551/559] do_umount(): add missing barrier before refcount checks in sync case do_umount() analogue of the race fixed in 119e1ef80ecf "fix __legitimize_mnt()/mntput() race". Here we want to make sure that if __legitimize_mnt() doesn't notice our lock_mount_hash(), we will notice their refcount increment. Harder to hit than mntput_no_expire() one, fortunately, and consequences are milder (sync umount acting like umount -l on a rare race with RCU pathwalk hitting at just the wrong time instead of use-after-free galore mntput_no_expire() counterpart used to be hit). Still a bug... Fixes: 48a066e72d97 ("RCU'd vfsmounts") Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index eba4748388b1..d8a344d0a80a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -787,7 +787,7 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) return 0; mnt = real_mount(bastard); mnt_add_count(mnt, 1); - smp_mb(); // see mntput_no_expire() + smp_mb(); // see mntput_no_expire() and do_umount() if (likely(!read_seqretry(&mount_lock, seq))) return 0; lock_mount_hash(); @@ -2044,6 +2044,7 @@ static int do_umount(struct mount *mnt, int flags) umount_tree(mnt, UMOUNT_PROPAGATE); retval = 0; } else { + smp_mb(); // paired with __legitimize_mnt() shrink_submounts(mnt); retval = -EBUSY; if (!propagate_mount_busy(mnt, 2)) { From 267fc3a06a37bec30cc5b4d97fb8409102bc7a9d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 28 Apr 2025 21:43:23 -0400 Subject: [PATCH 552/559] do_move_mount(): don't leak MNTNS_PROPAGATING on failures as it is, a failed move_mount(2) from anon namespace breaks all further propagation into that namespace, including normal mounts in non-anon namespaces that would otherwise propagate there. Fixes: 064fe6e233e8 ("mount: handle mount propagation for detached mount trees") Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/namespace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index d8a344d0a80a..04a9bb9f31fa 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3715,15 +3715,14 @@ static int do_move_mount(struct path *old_path, if (err) goto out; - if (is_anon_ns(ns)) - ns->mntns_flags &= ~MNTNS_PROPAGATING; - /* if the mount is moved, it should no longer be expire * automatically */ list_del_init(&old->mnt_expire); if (attached) put_mountpoint(old_mp); out: + if (is_anon_ns(ns)) + ns->mntns_flags &= ~MNTNS_PROPAGATING; unlock_mount(mp); if (!err) { if (attached) { From d1ddc6f1d9f0cf887834eb54a5a68bbfeec1bb77 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2025 15:35:51 -0400 Subject: [PATCH 553/559] fix IS_MNT_PROPAGATING uses propagate_mnt() does not attach anything to mounts created during propagate_mnt() itself. What's more, anything on ->mnt_slave_list of such new mount must also be new, so we don't need to even look there. When move_mount() had been introduced, we've got an additional class of mounts to skip - if we are moving from anon namespace, we do not want to propagate to mounts we are moving (i.e. all mounts in that anon namespace). Unfortunately, the part about "everything on their ->mnt_slave_list will also be ignorable" is not true - if we have propagation graph A -> B -> C and do OPEN_TREE_CLONE open_tree() of B, we get A -> [B <-> B'] -> C as propagation graph, where B' is a clone of B in our detached tree. Making B private will result in A -> B' -> C C still gets propagation from A, as it would after making B private if we hadn't done that open_tree(), but now the propagation goes through B'. Trying to move_mount() our detached tree on subdirectory in A should have * moved B' on that subdirectory in A * skipped the corresponding subdirectory in B' itself * copied B' on the corresponding subdirectory in C. As it is, the logics in propagation_next() and friends ends up skipping propagation into C, since it doesn't consider anything downstream of B'. IOW, walking the propagation graph should only skip the ->mnt_slave_list of new mounts; the only places where the check for "in that one anon namespace" are applicable are propagate_one() (where we should treat that as the same kind of thing as "mountpoint we are looking at is not visible in the mount we are looking at") and propagation_would_overmount(). The latter is better dealt with in the caller (can_move_mount_beneath()); on the first call of propagation_would_overmount() the test is always false, on the second it is always true in "move from anon namespace" case and always false in "move within our namespace" one, so it's easier to just use check_mnt() before bothering with the second call and be done with that. Fixes: 064fe6e233e8 ("mount: handle mount propagation for detached mount trees") Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/namespace.c | 3 ++- fs/pnode.c | 17 +++++++++-------- fs/pnode.h | 2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 04a9bb9f31fa..1b466c54a357 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3557,7 +3557,8 @@ static int can_move_mount_beneath(const struct path *from, * @mnt_from itself. This defeats the whole purpose of mounting * @mnt_from beneath @mnt_to. */ - if (propagation_would_overmount(parent_mnt_to, mnt_from, mp)) + if (check_mnt(mnt_from) && + propagation_would_overmount(parent_mnt_to, mnt_from, mp)) return -EINVAL; return 0; diff --git a/fs/pnode.c b/fs/pnode.c index 7a062a5de10e..fb77427df39e 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -150,7 +150,7 @@ static struct mount *propagation_next(struct mount *m, struct mount *origin) { /* are there any slaves of this mount? */ - if (!IS_MNT_PROPAGATED(m) && !list_empty(&m->mnt_slave_list)) + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) return first_slave(m); while (1) { @@ -174,7 +174,7 @@ static struct mount *skip_propagation_subtree(struct mount *m, * Advance m such that propagation_next will not return * the slaves of m. */ - if (!IS_MNT_PROPAGATED(m) && !list_empty(&m->mnt_slave_list)) + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) m = last_slave(m); return m; @@ -185,7 +185,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin) while (1) { while (1) { struct mount *next; - if (!IS_MNT_PROPAGATED(m) && !list_empty(&m->mnt_slave_list)) + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) return first_slave(m); next = next_peer(m); if (m->mnt_group_id == origin->mnt_group_id) { @@ -226,11 +226,15 @@ static int propagate_one(struct mount *m, struct mountpoint *dest_mp) struct mount *child; int type; /* skip ones added by this propagate_mnt() */ - if (IS_MNT_PROPAGATED(m)) + if (IS_MNT_NEW(m)) return 0; - /* skip if mountpoint isn't covered by it */ + /* skip if mountpoint isn't visible in m */ if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) return 0; + /* skip if m is in the anon_ns we are emptying */ + if (m->mnt_ns->mntns_flags & MNTNS_PROPAGATING) + return 0; + if (peers(m, last_dest)) { type = CL_MAKE_SHARED; } else { @@ -380,9 +384,6 @@ bool propagation_would_overmount(const struct mount *from, if (!IS_MNT_SHARED(from)) return false; - if (IS_MNT_PROPAGATED(to)) - return false; - if (to->mnt.mnt_root != mp->m_dentry) return false; diff --git a/fs/pnode.h b/fs/pnode.h index ddafe0d087ca..34b6247af01d 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -12,7 +12,7 @@ #define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED) #define IS_MNT_SLAVE(m) ((m)->mnt_master) -#define IS_MNT_PROPAGATED(m) (!(m)->mnt_ns || ((m)->mnt_ns->mntns_flags & MNTNS_PROPAGATING)) +#define IS_MNT_NEW(m) (!(m)->mnt_ns) #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED) #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE) #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) From e98960bc4df931a2f4fa88f7e55370699ca4dd82 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 6 Mar 2025 12:50:21 +0100 Subject: [PATCH 554/559] Input: hisi_powerkey - enable system-wakeup for s2idle To wake up the system from s2idle when pressing the power-button, let's convert from using pm_wakeup_event() to pm_wakeup_dev_event(), as it allows us to specify the "hard" in-parameter, which needs to be set for s2idle. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20250306115021.797426-1-ulf.hansson@linaro.org Signed-off-by: Dmitry Torokhov --- drivers/input/misc/hisi_powerkey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/hisi_powerkey.c b/drivers/input/misc/hisi_powerkey.c index d3c293a95d32..d315017324d9 100644 --- a/drivers/input/misc/hisi_powerkey.c +++ b/drivers/input/misc/hisi_powerkey.c @@ -30,7 +30,7 @@ static irqreturn_t hi65xx_power_press_isr(int irq, void *q) { struct input_dev *input = q; - pm_wakeup_event(input->dev.parent, MAX_HELD_TIME); + pm_wakeup_dev_event(input->dev.parent, MAX_HELD_TIME, true); input_report_key(input, KEY_POWER, 1); input_sync(input); From d05a424bea9aa3435009d5c462055008cc1545d8 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Fri, 28 Mar 2025 16:43:36 -0700 Subject: [PATCH 555/559] Input: xpad - fix two controller table values Two controllers -- Mad Catz JOYTECH NEO SE Advanced and PDP Mirror's Edge Official -- were missing the value of the mapping field, and thus wouldn't detect properly. Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250328234345.989761-1-vi@endrift.com Fixes: 540602a43ae5 ("Input: xpad - add a few new VID/PID combinations") Fixes: 3492321e2e60 ("Input: xpad - add multiple supported devices") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index c933e47173bd..2c935cf79ddc 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -205,7 +205,7 @@ static const struct xpad_device { { 0x0738, 0x9871, "Mad Catz Portable Drum", 0, XTYPE_XBOX360 }, { 0x0738, 0xb726, "Mad Catz Xbox controller - MW2", 0, XTYPE_XBOX360 }, { 0x0738, 0xb738, "Mad Catz MVC2TE Stick 2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, - { 0x0738, 0xbeef, "Mad Catz JOYTECH NEO SE Advanced GamePad", XTYPE_XBOX360 }, + { 0x0738, 0xbeef, "Mad Catz JOYTECH NEO SE Advanced GamePad", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb02, "Saitek Cyborg Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb03, "Saitek P3200 Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb29, "Saitek Aviator Stick AV8R02", 0, XTYPE_XBOX360 }, @@ -240,7 +240,7 @@ static const struct xpad_device { { 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0147, "PDP Marvel Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x015c, "PDP Xbox One Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, - { 0x0e6f, 0x015d, "PDP Mirror's Edge Official Wired Controller for Xbox One", XTYPE_XBOXONE }, + { 0x0e6f, 0x015d, "PDP Mirror's Edge Official Wired Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0161, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0162, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0163, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, From 4ef46367073b107ec22f46fe5f12176e87c238e8 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Sat, 10 May 2025 22:59:25 -0700 Subject: [PATCH 556/559] Input: xpad - fix Share button on Xbox One controllers The Share button, if present, is always one of two offsets from the end of the file, depending on the presence of a specific interface. As we lack parsing for the identify packet we can't automatically determine the presence of that interface, but we can hardcode which of these offsets is correct for a given controller. More controllers are probably fixable by adding the MAP_SHARE_BUTTON in the future, but for now I only added the ones that I have the ability to test directly. Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250328234345.989761-2-vi@endrift.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 2c935cf79ddc..8ee7d8e5d1c7 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -77,12 +77,13 @@ * xbox d-pads should map to buttons, as is required for DDR pads * but we map them to axes when possible to simplify things */ -#define MAP_DPAD_TO_BUTTONS (1 << 0) -#define MAP_TRIGGERS_TO_BUTTONS (1 << 1) -#define MAP_STICKS_TO_NULL (1 << 2) -#define MAP_SELECT_BUTTON (1 << 3) -#define MAP_PADDLES (1 << 4) -#define MAP_PROFILE_BUTTON (1 << 5) +#define MAP_DPAD_TO_BUTTONS BIT(0) +#define MAP_TRIGGERS_TO_BUTTONS BIT(1) +#define MAP_STICKS_TO_NULL BIT(2) +#define MAP_SHARE_BUTTON BIT(3) +#define MAP_PADDLES BIT(4) +#define MAP_PROFILE_BUTTON BIT(5) +#define MAP_SHARE_OFFSET BIT(6) #define DANCEPAD_MAP_CONFIG (MAP_DPAD_TO_BUTTONS | \ MAP_TRIGGERS_TO_BUTTONS | MAP_STICKS_TO_NULL) @@ -135,7 +136,7 @@ static const struct xpad_device { { 0x03f0, 0x048D, "HyperX Clutch", 0, XTYPE_XBOX360 }, /* wireless */ { 0x03f0, 0x0495, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, { 0x03f0, 0x07A0, "HyperX Clutch Gladiate RGB", 0, XTYPE_XBOXONE }, - { 0x03f0, 0x08B6, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, /* v2 */ + { 0x03f0, 0x08B6, "HyperX Clutch Gladiate", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, /* v2 */ { 0x03f0, 0x09B4, "HyperX Clutch Tanto", 0, XTYPE_XBOXONE }, { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX }, @@ -159,7 +160,7 @@ static const struct xpad_device { { 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x045e, 0x0b00, "Microsoft X-Box One Elite 2 pad", MAP_PADDLES, XTYPE_XBOXONE }, { 0x045e, 0x0b0a, "Microsoft X-Box Adaptive Controller", MAP_PROFILE_BUTTON, XTYPE_XBOXONE }, - { 0x045e, 0x0b12, "Microsoft Xbox Series S|X Controller", MAP_SELECT_BUTTON, XTYPE_XBOXONE }, + { 0x045e, 0x0b12, "Microsoft Xbox Series S|X Controller", MAP_SHARE_BUTTON | MAP_SHARE_OFFSET, XTYPE_XBOXONE }, { 0x046d, 0xc21d, "Logitech Gamepad F310", 0, XTYPE_XBOX360 }, { 0x046d, 0xc21e, "Logitech Gamepad F510", 0, XTYPE_XBOX360 }, { 0x046d, 0xc21f, "Logitech Gamepad F710", 0, XTYPE_XBOX360 }, @@ -211,7 +212,7 @@ static const struct xpad_device { { 0x0738, 0xcb29, "Saitek Aviator Stick AV8R02", 0, XTYPE_XBOX360 }, { 0x0738, 0xf738, "Super SFIV FightStick TE S", 0, XTYPE_XBOX360 }, { 0x07ff, 0xffff, "Mad Catz GamePad", 0, XTYPE_XBOX360 }, - { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", 0, XTYPE_XBOXONE }, + { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x0b05, 0x1abb, "ASUS ROG RAIKIRI PRO", 0, XTYPE_XBOXONE }, { 0x0c12, 0x0005, "Intec wireless", 0, XTYPE_XBOX }, { 0x0c12, 0x8801, "Nyko Xbox Controller", 0, XTYPE_XBOX }, @@ -391,7 +392,7 @@ static const struct xpad_device { { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x2e24, 0x1688, "Hyperkin X91 X-Box One pad", 0, XTYPE_XBOXONE }, - { 0x2e95, 0x0504, "SCUF Gaming Controller", MAP_SELECT_BUTTON, XTYPE_XBOXONE }, + { 0x2e95, 0x0504, "SCUF Gaming Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 }, @@ -1028,7 +1029,7 @@ static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned cha * The report format was gleaned from * https://github.com/kylelemons/xbox/blob/master/xbox.go */ -static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) +static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data, u32 len) { struct input_dev *dev = xpad->dev; bool do_sync = false; @@ -1069,8 +1070,12 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char /* menu/view buttons */ input_report_key(dev, BTN_START, data[4] & BIT(2)); input_report_key(dev, BTN_SELECT, data[4] & BIT(3)); - if (xpad->mapping & MAP_SELECT_BUTTON) - input_report_key(dev, KEY_RECORD, data[22] & BIT(0)); + if (xpad->mapping & MAP_SHARE_BUTTON) { + if (xpad->mapping & MAP_SHARE_OFFSET) + input_report_key(dev, KEY_RECORD, data[len - 26] & BIT(0)); + else + input_report_key(dev, KEY_RECORD, data[len - 18] & BIT(0)); + } /* buttons A,B,X,Y */ input_report_key(dev, BTN_A, data[4] & BIT(4)); @@ -1218,7 +1223,7 @@ static void xpad_irq_in(struct urb *urb) xpad360w_process_packet(xpad, 0, xpad->idata); break; case XTYPE_XBOXONE: - xpadone_process_packet(xpad, 0, xpad->idata); + xpadone_process_packet(xpad, 0, xpad->idata, urb->actual_length); break; default: xpad_process_packet(xpad, 0, xpad->idata); @@ -1945,7 +1950,7 @@ static int xpad_init_input(struct usb_xpad *xpad) xpad->xtype == XTYPE_XBOXONE) { for (i = 0; xpad360_btn[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad360_btn[i]); - if (xpad->mapping & MAP_SELECT_BUTTON) + if (xpad->mapping & MAP_SHARE_BUTTON) input_set_capability(input_dev, EV_KEY, KEY_RECORD); } else { for (i = 0; xpad_btn[i] >= 0; i++) From 7026d23cb383120712f3a214b0b33ca349cd21a0 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Sat, 10 May 2025 23:00:10 -0700 Subject: [PATCH 557/559] Input: xpad - add support for several more controllers This adds support for several new controllers, all of which include Share buttons: - HORI Drum controller - PowerA Fusion Pro 4 - 8BitDo Ultimate 3-mode Controller - Hyperkin DuchesS Xbox One controller - PowerA MOGA XP-Ultra controller Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250328234345.989761-4-vi@endrift.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 8ee7d8e5d1c7..418553cadc03 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -282,6 +282,7 @@ static const struct xpad_device { { 0x0f0d, 0x00dc, "HORIPAD FPS for Nintendo Switch", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x0151, "Hori Racing Wheel Overdrive for Xbox Series X", 0, XTYPE_XBOXONE }, { 0x0f0d, 0x0152, "Hori Racing Wheel Overdrive for Xbox Series X", 0, XTYPE_XBOXONE }, + { 0x0f0d, 0x01b2, "HORI Taiko No Tatsujin Drum Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x0f30, 0x010b, "Philips Recoil", 0, XTYPE_XBOX }, { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX }, { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX }, @@ -354,6 +355,8 @@ static const struct xpad_device { { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE }, { 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE }, { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 }, + { 0x20d6, 0x400b, "PowerA FUSION Pro 4 Wired Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, + { 0x20d6, 0x890b, "PowerA MOGA XP-Ultra Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x2345, 0xe00b, "Machenike G5 Pro Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5000, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 }, @@ -385,11 +388,13 @@ static const struct xpad_device { { 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE }, { 0x2993, 0x2001, "TECNO Pocket Go", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE }, + { 0x2dc8, 0x200f, "8BitDo Ultimate 3-mode Controller for Xbox", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x2dc8, 0x3106, "8BitDo Ultimate Wireless / Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x3109, "8BitDo Ultimate Wireless Bluetooth", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310a, "8BitDo Ultimate 2C Wireless Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310b, "8BitDo Ultimate 2 Wireless Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, + { 0x2e24, 0x0423, "Hyperkin DuchesS Xbox One pad", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x2e24, 0x1688, "Hyperkin X91 X-Box One pad", 0, XTYPE_XBOXONE }, { 0x2e95, 0x0504, "SCUF Gaming Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, @@ -716,8 +721,10 @@ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x045e, 0x0b00, xboxone_s_init), XBOXONE_INIT_PKT(0x045e, 0x0b00, extra_input_packet_init), XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_led_on), + XBOXONE_INIT_PKT(0x0f0d, 0x01b2, xboxone_pdp_led_on), XBOXONE_INIT_PKT(0x20d6, 0xa01a, xboxone_pdp_led_on), XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_auth), + XBOXONE_INIT_PKT(0x0f0d, 0x01b2, xboxone_pdp_auth), XBOXONE_INIT_PKT(0x20d6, 0xa01a, xboxone_pdp_auth), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), From 93406e9d024058b3bf487656ddd0ac552e5a366e Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Sat, 10 May 2025 23:06:34 -0700 Subject: [PATCH 558/559] Input: xpad - fix xpad_device sorting A recent commit put one entry in the wrong place. This just moves it to the right place. Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250328234345.989761-5-vi@endrift.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 418553cadc03..57a5ff3d1992 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -141,9 +141,9 @@ static const struct xpad_device { { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX }, - { 0x044f, 0xd01e, "ThrustMaster, Inc. ESWAP X 2 ELDEN RING EDITION", 0, XTYPE_XBOXONE }, { 0x044f, 0x0f10, "Thrustmaster Modena GT Wheel", 0, XTYPE_XBOX }, { 0x044f, 0xb326, "Thrustmaster Gamepad GP XID", 0, XTYPE_XBOX360 }, + { 0x044f, 0xd01e, "ThrustMaster, Inc. ESWAP X 2 ELDEN RING EDITION", 0, XTYPE_XBOXONE }, { 0x045e, 0x0202, "Microsoft X-Box pad v1 (US)", 0, XTYPE_XBOX }, { 0x045e, 0x0285, "Microsoft X-Box pad (Japan)", 0, XTYPE_XBOX }, { 0x045e, 0x0287, "Microsoft Xbox Controller S", 0, XTYPE_XBOX }, From 82f2b0b97b36ee3fcddf0f0780a9a0825d52fec3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 May 2025 14:54:11 -0700 Subject: [PATCH 559/559] Linux 6.15-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b29cc321ffd9..64c514f4bc19 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION*