KVM: s390: Fix to clear PTE when discarding a swapped page

KVM run fails when guests with 'cmm' cpu feature and host are
under memory pressure and use swap heavily. This is because
npages becomes ENOMEN (out of memory) in hva_to_pfn_slow()
which inturn propagates as EFAULT to qemu. Clearing the page
table entry when discarding an address that maps to a swap
entry resolves the issue.

Fixes: 200197908d ("KVM: s390: Refactor and split some gmap helpers")
Cc: stable@vger.kernel.org
Suggested-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: Gautam Gala <ggala@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
pull/1354/merge
Gautam Gala 2025-09-24 13:26:44 +02:00 committed by Claudio Imbrenda
parent 352ccf890a
commit 5deafa27d9
3 changed files with 34 additions and 23 deletions

View File

@ -2010,4 +2010,26 @@ static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt)
return res; return res;
} }
static inline pgste_t pgste_get_lock(pte_t *ptep)
{
unsigned long value = 0;
#ifdef CONFIG_PGSTE
unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
do {
value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
} while (value & PGSTE_PCL_BIT);
value |= PGSTE_PCL_BIT;
#endif
return __pgste(value);
}
static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
barrier();
WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
#endif
}
#endif /* _S390_PAGE_H */ #endif /* _S390_PAGE_H */

View File

@ -13,6 +13,7 @@
#include <linux/pagewalk.h> #include <linux/pagewalk.h>
#include <linux/ksm.h> #include <linux/ksm.h>
#include <asm/gmap_helpers.h> #include <asm/gmap_helpers.h>
#include <asm/pgtable.h>
/** /**
* ptep_zap_swap_entry() - discard a swap entry. * ptep_zap_swap_entry() - discard a swap entry.
@ -45,6 +46,7 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
{ {
struct vm_area_struct *vma; struct vm_area_struct *vma;
spinlock_t *ptl; spinlock_t *ptl;
pgste_t pgste;
pte_t *ptep; pte_t *ptep;
mmap_assert_locked(mm); mmap_assert_locked(mm);
@ -58,8 +60,16 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
ptep = get_locked_pte(mm, vmaddr, &ptl); ptep = get_locked_pte(mm, vmaddr, &ptl);
if (unlikely(!ptep)) if (unlikely(!ptep))
return; return;
if (pte_swap(*ptep)) if (pte_swap(*ptep)) {
preempt_disable();
pgste = pgste_get_lock(ptep);
ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep)); ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
pte_clear(mm, vmaddr, ptep);
pgste_set_unlock(ptep, pgste);
preempt_enable();
}
pte_unmap_unlock(ptep, ptl); pte_unmap_unlock(ptep, ptl);
} }
EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page); EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);

View File

@ -23,6 +23,7 @@
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/page-states.h> #include <asm/page-states.h>
#include <asm/pgtable.h>
#include <asm/machine.h> #include <asm/machine.h>
pgprot_t pgprot_writecombine(pgprot_t prot) pgprot_t pgprot_writecombine(pgprot_t prot)
@ -114,28 +115,6 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
return old; return old;
} }
static inline pgste_t pgste_get_lock(pte_t *ptep)
{
unsigned long value = 0;
#ifdef CONFIG_PGSTE
unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
do {
value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
} while (value & PGSTE_PCL_BIT);
value |= PGSTE_PCL_BIT;
#endif
return __pgste(value);
}
static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
barrier();
WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
#endif
}
static inline pgste_t pgste_get(pte_t *ptep) static inline pgste_t pgste_get(pte_t *ptep)
{ {
unsigned long pgste = 0; unsigned long pgste = 0;