mm: khugepaged: skip lazy-free folios
For example, create three task: hot1 -> cold -> hot2. After all three task are created, each allocate memory 128MB. the hot1/hot2 task continuously access 128 MB memory, while the cold task only accesses its memory briefly and then call madvise(MADV_FREE). However, khugepaged still prioritizes scanning the cold task and only scans the hot2 task after completing the scan of the cold task. All folios in VM_DROPPABLE are lazyfree, Collapsing maintains that property, so we can just collapse and memory pressure in the future will free it up. In contrast, collapsing in !VM_DROPPABLE does not maintain that property, the collapsed folio will not be lazyfree and memory pressure in the future will not be able to free it up. So if the user has explicitly informed us via MADV_FREE that this memory will be freed, and this vma does not have VM_DROPPABLE flags, it is appropriate for khugepaged to skip it only, thereby avoiding unnecessary scan and collapse operations to reducing CPU wastage. Here are the performance test results: (Throughput bigger is better, other smaller is better) Testing on x86_64 machine: | task hot2 | without patch | with patch | delta | |---------------------|---------------|---------------|---------| | total accesses time | 3.14 sec | 2.93 sec | -6.69% | | cycles per access | 4.96 | 2.21 | -55.44% | | Throughput | 104.38 M/sec | 111.89 M/sec | +7.19% | | dTLB-load-misses | 284814532 | 69597236 | -75.56% | Testing on qemu-system-x86_64 -enable-kvm: | task hot2 | without patch | with patch | delta | |---------------------|---------------|---------------|---------| | total accesses time | 3.35 sec | 2.96 sec | -11.64% | | cycles per access | 7.29 | 2.07 | -71.60% | | Throughput | 97.67 M/sec | 110.77 M/sec | +13.41% | | dTLB-load-misses | 241600871 | 3216108 | -98.67% | [vernon2gm@gmail.com: add comment about VM_DROPPABLE in code, make it clearer] Link: https://lkml.kernel.org/r/i4uowkt4h2ev47obm5h2vtd4zbk6fyw5g364up7kkjn2vmcikq@auepvqethj5r Link: https://lkml.kernel.org/r/20260221093918.1456187-5-vernon2gm@gmail.com Signed-off-by: Vernon Yang <yanglincheng@kylinos.cn> Acked-by: David Hildenbrand (arm) <david@kernel.org> Reviewed-by: Lance Yang <lance.yang@linux.dev> Reviewed-by: Barry Song <baohua@kernel.org> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Dev Jain <dev.jain@arm.com> Cc: Liam Howlett <Liam.Howlett@oracle.com> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Nico Pache <npache@redhat.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>master
parent
6cc153f90b
commit
0562041977
|
|
@ -25,6 +25,7 @@
|
|||
EM( SCAN_PAGE_LRU, "page_not_in_lru") \
|
||||
EM( SCAN_PAGE_LOCK, "page_locked") \
|
||||
EM( SCAN_PAGE_ANON, "page_not_anon") \
|
||||
EM( SCAN_PAGE_LAZYFREE, "page_lazyfree") \
|
||||
EM( SCAN_PAGE_COMPOUND, "page_compound") \
|
||||
EM( SCAN_ANY_PROCESS, "no_process_for_page") \
|
||||
EM( SCAN_VMA_NULL, "vma_null") \
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ enum scan_result {
|
|||
SCAN_PAGE_LRU,
|
||||
SCAN_PAGE_LOCK,
|
||||
SCAN_PAGE_ANON,
|
||||
SCAN_PAGE_LAZYFREE,
|
||||
SCAN_PAGE_COMPOUND,
|
||||
SCAN_ANY_PROCESS,
|
||||
SCAN_VMA_NULL,
|
||||
|
|
@ -577,6 +578,16 @@ static enum scan_result __collapse_huge_page_isolate(struct vm_area_struct *vma,
|
|||
folio = page_folio(page);
|
||||
VM_BUG_ON_FOLIO(!folio_test_anon(folio), folio);
|
||||
|
||||
/*
|
||||
* If the vma has the VM_DROPPABLE flag, the collapse will
|
||||
* preserve the lazyfree property without needing to skip.
|
||||
*/
|
||||
if (cc->is_khugepaged && !(vma->vm_flags & VM_DROPPABLE) &&
|
||||
folio_test_lazyfree(folio) && !pte_dirty(pteval)) {
|
||||
result = SCAN_PAGE_LAZYFREE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* See hpage_collapse_scan_pmd(). */
|
||||
if (folio_maybe_mapped_shared(folio)) {
|
||||
++shared;
|
||||
|
|
@ -1325,6 +1336,16 @@ static enum scan_result hpage_collapse_scan_pmd(struct mm_struct *mm,
|
|||
}
|
||||
folio = page_folio(page);
|
||||
|
||||
/*
|
||||
* If the vma has the VM_DROPPABLE flag, the collapse will
|
||||
* preserve the lazyfree property without needing to skip.
|
||||
*/
|
||||
if (cc->is_khugepaged && !(vma->vm_flags & VM_DROPPABLE) &&
|
||||
folio_test_lazyfree(folio) && !pte_dirty(pteval)) {
|
||||
result = SCAN_PAGE_LAZYFREE;
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
if (!folio_test_anon(folio)) {
|
||||
result = SCAN_PAGE_ANON;
|
||||
goto out_unmap;
|
||||
|
|
|
|||
Loading…
Reference in New Issue