mm/hugetlb_vmemmap: fix incorrect vmemmap restore in rollback

vmemmap_restore_pte() rebuilds restored vmemmap pages from a tail-page
template derived from compound_head().  This is wrong when the current PTE
already maps a page whose contents are not tail-page metadata.

In the rollback path of vmemmap_remap_free(), the first restored PTE is
backed by vmemmap_head and contains head-page metadata.  Reconstructing
that page from a tail-page template overwrites the head-page state and
corrupts the restored vmemmap page.

Fix this by copying the full page from the page currently mapped by the
PTE.  Also pass vmemmap_tail to the rollback walk so only PTEs backed by
the shared tail page are restored, while the head PTE remains mapped to
vmemmap_head.  Add VM_WARN_ON_ONCE() checks for unexpected cases.

Link: https://lore.kernel.org/20260525025213.2229628-1-songmuchun@bytedance.com
Fixes: c0b495b91a ("mm/hugetlb: refactor code around vmemmap_walk")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Kiryl Shutsemau <kas@kernel.org>
Acked-by: Oscar Salvador (SUSE) <osalvador@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
master
Muchun Song 2026-05-25 10:52:13 +08:00 committed by Andrew Morton
parent d6b8b02a27
commit c7bde43f6d
1 changed files with 18 additions and 18 deletions

View File

@ -207,6 +207,8 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
/* Remapping the head page requires r/w */
if (unlikely(walk->nr_walked == 0 && walk->vmemmap_head)) {
VM_WARN_ON_ONCE(!PageHead((const struct page *)addr));
list_del(&walk->vmemmap_head->lru);
/*
@ -218,6 +220,8 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
entry = mk_pte(walk->vmemmap_head, PAGE_KERNEL);
} else {
VM_WARN_ON_ONCE(!PageTail((const struct page *)addr));
/*
* Remap the tail pages as read-only to catch illegal write
* operation to the tail pages.
@ -232,33 +236,28 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
struct vmemmap_remap_walk *walk)
{
struct page *page;
struct page *from, *to;
page = list_first_entry(walk->vmemmap_pages, struct page, lru);
list_del(&page->lru);
struct page *src = pte_page(ptep_get(pte)), *dst;
/*
* Initialize tail pages in the newly allocated vmemmap page.
*
* There is folio-scope metadata that is encoded in the first few
* tail pages.
*
* Use the value last tail page in the page with the head page
* to initialize the rest of tail pages.
* When rolling back vmemmap_remap_free(), keep the copied head page
* mapping and restore only PTEs currently pointing at the shared tail
* page.
*/
from = compound_head((struct page *)addr) +
PAGE_SIZE / sizeof(struct page) - 1;
to = page_to_virt(page);
for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++, to++)
*to = *from;
if (walk->vmemmap_tail && walk->vmemmap_tail != src)
return;
VM_WARN_ON_ONCE(PageHead((const struct page *)addr));
dst = list_first_entry(walk->vmemmap_pages, struct page, lru);
list_del(&dst->lru);
copy_page(page_to_virt(dst), page_to_virt(src));
/*
* Makes sure that preceding stores to the page contents become visible
* before the set_pte_at() write.
*/
smp_wmb();
set_pte_at(&init_mm, addr, pte, mk_pte(page, PAGE_KERNEL));
set_pte_at(&init_mm, addr, pte, mk_pte(dst, PAGE_KERNEL));
}
/**
@ -324,6 +323,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
*/
walk = (struct vmemmap_remap_walk) {
.remap_pte = vmemmap_restore_pte,
.vmemmap_tail = vmemmap_tail,
.vmemmap_pages = vmemmap_pages,
.flags = 0,
};