bpf_try_alloc_pages
-----BEGIN PGP SIGNATURE-----
iQIzBAABCAAdFiEE+soXsSLHKoYyzcli6rmadz2vbToFAmfkHCQACgkQ6rmadz2v
bTrzWhAAnDcJsGgSJ9EbElpTfgBWE7aijXo/MsPxxRhORc0uR6MnhPx1iADP4KYj
lTGEIBgRuDG3qaM4EXpPd32rUJJHv8hot7z9zfvUgSuFNLZEHWXJtz/i4ileOxin
08zV+zA5WL2fqamAmMRFMI37DeSWy3xU0/qlbWgNnURjPjRri6CF4rVFUWq+QMY+
XP8ITD/6nOLUR6Bq2M18aHnk2VJWkxVP9Oi+vz1VHbOjKaJC7ATa1+Q4qMqWyTb1
8IAYWiZR1ZPc214ITaspVzLoLb/wxHxy3QMrdAWAL6sjp0B4J8YxIq1qsBuR1FN7
TxTRQND/+LjqrAgs5AmFqz3ndKmahjGQWnQEh/rDYJtx+sLJk9hfsMIDF8Wmxuwl
RftdV0g9bPljR5Qgc9i8DNtEjoAbNjoP8xLjt9HfQakVl8V9jPe0bxZ5tJDf+T0M
n/VgEjaRzdXqFOLal6Z5wl/jkIn1l1kWQuCMI2z5Z0Ls+PlYX56xdZxfK2Rh3m+e
3W89vqj9ytJ3rZKG8DRsxukuHwnJ+Gia3XI2h/5cc8kEM5ss1Ase8oIkmrwaLd9x
+zVXNoDCCPRQgTStwItW+2YdFmE9uijhEZh9yPwT1/rtFuKd0oSebVIpjih/bGqH
mMN9gYO4+ArSbqku9X2lP3VjMOf6M6SZGm+PzG25PAMGzjqGqwk=
=AHTr
-----END PGP SIGNATURE-----
Merge tag 'bpf_try_alloc_pages' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf try_alloc_pages() support from Alexei Starovoitov:
"The pull includes work from Sebastian, Vlastimil and myself with a lot
of help from Michal and Shakeel.
This is a first step towards making kmalloc reentrant to get rid of
slab wrappers: bpf_mem_alloc, kretprobe's objpool, etc. These patches
make page allocator safe from any context.
Vlastimil kicked off this effort at LSFMM 2024:
https://lwn.net/Articles/974138/
and we continued at LSFMM 2025:
https://lore.kernel.org/all/CAADnVQKfkGxudNUkcPJgwe3nTZ=xohnRshx9kLZBTmR_E1DFEg@mail.gmail.com/
Why:
SLAB wrappers bind memory to a particular subsystem making it
unavailable to the rest of the kernel. Some BPF maps in production
consume Gbytes of preallocated memory. Top 5 in Meta: 1.5G, 1.2G,
1.1G, 300M, 200M. Once we have kmalloc that works in any context BPF
map preallocation won't be necessary.
How:
Synchronous kmalloc/page alloc stack has multiple stages going from
fast to slow: cmpxchg16 -> slab_alloc -> new_slab -> alloc_pages ->
rmqueue_pcplist -> __rmqueue, where rmqueue_pcplist was already
relying on trylock.
This set changes rmqueue_bulk/rmqueue_buddy to attempt a trylock and
return ENOMEM if alloc_flags & ALLOC_TRYLOCK. It then wraps this
functionality into try_alloc_pages() helper. We make sure that the
logic is sane in PREEMPT_RT.
End result: try_alloc_pages()/free_pages_nolock() are safe to call
from any context.
try_kmalloc() for any context with similar trylock approach will
follow. It will use try_alloc_pages() when slab needs a new page.
Though such try_kmalloc/page_alloc() is an opportunistic allocator,
this design ensures that the probability of successful allocation of
small objects (up to one page in size) is high.
Even before we have try_kmalloc(), we already use try_alloc_pages() in
BPF arena implementation and it's going to be used more extensively in
BPF"
* tag 'bpf_try_alloc_pages' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next:
mm: Fix the flipped condition in gfpflags_allow_spinning()
bpf: Use try_alloc_pages() to allocate pages for bpf needs.
mm, bpf: Use memcg in try_alloc_pages().
memcg: Use trylock to access memcg stock_lock.
mm, bpf: Introduce free_pages_nolock()
mm, bpf: Introduce try_alloc_pages() for opportunistic page allocation
locking/local_lock: Introduce localtry_lock_t
pull/1190/head
commit
aa918db707
|
|
@ -2385,7 +2385,7 @@ int generic_map_delete_batch(struct bpf_map *map,
|
|||
struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
|
||||
struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
|
||||
|
||||
int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
|
||||
int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
|
||||
unsigned long nr_pages, struct page **page_array);
|
||||
#ifdef CONFIG_MEMCG
|
||||
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
|
||||
|
|
|
|||
|
|
@ -39,6 +39,25 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
|
|||
return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
|
||||
}
|
||||
|
||||
static inline bool gfpflags_allow_spinning(const gfp_t gfp_flags)
|
||||
{
|
||||
/*
|
||||
* !__GFP_DIRECT_RECLAIM -> direct claim is not allowed.
|
||||
* !__GFP_KSWAPD_RECLAIM -> it's not safe to wake up kswapd.
|
||||
* All GFP_* flags including GFP_NOWAIT use one or both flags.
|
||||
* try_alloc_pages() is the only API that doesn't specify either flag.
|
||||
*
|
||||
* This is stronger than GFP_NOWAIT or GFP_ATOMIC because
|
||||
* those are guaranteed to never block on a sleeping lock.
|
||||
* Here we are enforcing that the allocation doesn't ever spin
|
||||
* on any locks (i.e. only trylocks). There is no high level
|
||||
* GFP_$FOO flag for this use in try_alloc_pages() as the
|
||||
* regular page allocator doesn't fully support this
|
||||
* allocation mode.
|
||||
*/
|
||||
return !!(gfp_flags & __GFP_RECLAIM);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
|
||||
#else
|
||||
|
|
@ -335,6 +354,9 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp,
|
|||
}
|
||||
#define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__))
|
||||
|
||||
struct page *try_alloc_pages_noprof(int nid, unsigned int order);
|
||||
#define try_alloc_pages(...) alloc_hooks(try_alloc_pages_noprof(__VA_ARGS__))
|
||||
|
||||
extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order);
|
||||
#define __get_free_pages(...) alloc_hooks(get_free_pages_noprof(__VA_ARGS__))
|
||||
|
||||
|
|
@ -357,6 +379,7 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas
|
|||
__get_free_pages((gfp_mask) | GFP_DMA, (order))
|
||||
|
||||
extern void __free_pages(struct page *page, unsigned int order);
|
||||
extern void free_pages_nolock(struct page *page, unsigned int order);
|
||||
extern void free_pages(unsigned long addr, unsigned int order);
|
||||
|
||||
#define __free_page(page) __free_pages((page), 0)
|
||||
|
|
|
|||
|
|
@ -51,6 +51,76 @@
|
|||
#define local_unlock_irqrestore(lock, flags) \
|
||||
__local_unlock_irqrestore(lock, flags)
|
||||
|
||||
/**
|
||||
* localtry_lock_init - Runtime initialize a lock instance
|
||||
*/
|
||||
#define localtry_lock_init(lock) __localtry_lock_init(lock)
|
||||
|
||||
/**
|
||||
* localtry_lock - Acquire a per CPU local lock
|
||||
* @lock: The lock variable
|
||||
*/
|
||||
#define localtry_lock(lock) __localtry_lock(lock)
|
||||
|
||||
/**
|
||||
* localtry_lock_irq - Acquire a per CPU local lock and disable interrupts
|
||||
* @lock: The lock variable
|
||||
*/
|
||||
#define localtry_lock_irq(lock) __localtry_lock_irq(lock)
|
||||
|
||||
/**
|
||||
* localtry_lock_irqsave - Acquire a per CPU local lock, save and disable
|
||||
* interrupts
|
||||
* @lock: The lock variable
|
||||
* @flags: Storage for interrupt flags
|
||||
*/
|
||||
#define localtry_lock_irqsave(lock, flags) \
|
||||
__localtry_lock_irqsave(lock, flags)
|
||||
|
||||
/**
|
||||
* localtry_trylock - Try to acquire a per CPU local lock.
|
||||
* @lock: The lock variable
|
||||
*
|
||||
* The function can be used in any context such as NMI or HARDIRQ. Due to
|
||||
* locking constrains it will _always_ fail to acquire the lock in NMI or
|
||||
* HARDIRQ context on PREEMPT_RT.
|
||||
*/
|
||||
#define localtry_trylock(lock) __localtry_trylock(lock)
|
||||
|
||||
/**
|
||||
* localtry_trylock_irqsave - Try to acquire a per CPU local lock, save and disable
|
||||
* interrupts if acquired
|
||||
* @lock: The lock variable
|
||||
* @flags: Storage for interrupt flags
|
||||
*
|
||||
* The function can be used in any context such as NMI or HARDIRQ. Due to
|
||||
* locking constrains it will _always_ fail to acquire the lock in NMI or
|
||||
* HARDIRQ context on PREEMPT_RT.
|
||||
*/
|
||||
#define localtry_trylock_irqsave(lock, flags) \
|
||||
__localtry_trylock_irqsave(lock, flags)
|
||||
|
||||
/**
|
||||
* local_unlock - Release a per CPU local lock
|
||||
* @lock: The lock variable
|
||||
*/
|
||||
#define localtry_unlock(lock) __localtry_unlock(lock)
|
||||
|
||||
/**
|
||||
* local_unlock_irq - Release a per CPU local lock and enable interrupts
|
||||
* @lock: The lock variable
|
||||
*/
|
||||
#define localtry_unlock_irq(lock) __localtry_unlock_irq(lock)
|
||||
|
||||
/**
|
||||
* localtry_unlock_irqrestore - Release a per CPU local lock and restore
|
||||
* interrupt flags
|
||||
* @lock: The lock variable
|
||||
* @flags: Interrupt flags to restore
|
||||
*/
|
||||
#define localtry_unlock_irqrestore(lock, flags) \
|
||||
__localtry_unlock_irqrestore(lock, flags)
|
||||
|
||||
DEFINE_GUARD(local_lock, local_lock_t __percpu*,
|
||||
local_lock(_T),
|
||||
local_unlock(_T))
|
||||
|
|
|
|||
|
|
@ -15,6 +15,11 @@ typedef struct {
|
|||
#endif
|
||||
} local_lock_t;
|
||||
|
||||
typedef struct {
|
||||
local_lock_t llock;
|
||||
unsigned int acquired;
|
||||
} localtry_lock_t;
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
# define LOCAL_LOCK_DEBUG_INIT(lockname) \
|
||||
.dep_map = { \
|
||||
|
|
@ -31,6 +36,13 @@ static inline void local_lock_acquire(local_lock_t *l)
|
|||
l->owner = current;
|
||||
}
|
||||
|
||||
static inline void local_trylock_acquire(local_lock_t *l)
|
||||
{
|
||||
lock_map_acquire_try(&l->dep_map);
|
||||
DEBUG_LOCKS_WARN_ON(l->owner);
|
||||
l->owner = current;
|
||||
}
|
||||
|
||||
static inline void local_lock_release(local_lock_t *l)
|
||||
{
|
||||
DEBUG_LOCKS_WARN_ON(l->owner != current);
|
||||
|
|
@ -45,11 +57,13 @@ static inline void local_lock_debug_init(local_lock_t *l)
|
|||
#else /* CONFIG_DEBUG_LOCK_ALLOC */
|
||||
# define LOCAL_LOCK_DEBUG_INIT(lockname)
|
||||
static inline void local_lock_acquire(local_lock_t *l) { }
|
||||
static inline void local_trylock_acquire(local_lock_t *l) { }
|
||||
static inline void local_lock_release(local_lock_t *l) { }
|
||||
static inline void local_lock_debug_init(local_lock_t *l) { }
|
||||
#endif /* !CONFIG_DEBUG_LOCK_ALLOC */
|
||||
|
||||
#define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) }
|
||||
#define INIT_LOCALTRY_LOCK(lockname) { .llock = { LOCAL_LOCK_DEBUG_INIT(lockname.llock) }}
|
||||
|
||||
#define __local_lock_init(lock) \
|
||||
do { \
|
||||
|
|
@ -118,6 +132,104 @@ do { \
|
|||
#define __local_unlock_nested_bh(lock) \
|
||||
local_lock_release(this_cpu_ptr(lock))
|
||||
|
||||
/* localtry_lock_t variants */
|
||||
|
||||
#define __localtry_lock_init(lock) \
|
||||
do { \
|
||||
__local_lock_init(&(lock)->llock); \
|
||||
WRITE_ONCE((lock)->acquired, 0); \
|
||||
} while (0)
|
||||
|
||||
#define __localtry_lock(lock) \
|
||||
do { \
|
||||
localtry_lock_t *lt; \
|
||||
preempt_disable(); \
|
||||
lt = this_cpu_ptr(lock); \
|
||||
local_lock_acquire(<->llock); \
|
||||
WRITE_ONCE(lt->acquired, 1); \
|
||||
} while (0)
|
||||
|
||||
#define __localtry_lock_irq(lock) \
|
||||
do { \
|
||||
localtry_lock_t *lt; \
|
||||
local_irq_disable(); \
|
||||
lt = this_cpu_ptr(lock); \
|
||||
local_lock_acquire(<->llock); \
|
||||
WRITE_ONCE(lt->acquired, 1); \
|
||||
} while (0)
|
||||
|
||||
#define __localtry_lock_irqsave(lock, flags) \
|
||||
do { \
|
||||
localtry_lock_t *lt; \
|
||||
local_irq_save(flags); \
|
||||
lt = this_cpu_ptr(lock); \
|
||||
local_lock_acquire(<->llock); \
|
||||
WRITE_ONCE(lt->acquired, 1); \
|
||||
} while (0)
|
||||
|
||||
#define __localtry_trylock(lock) \
|
||||
({ \
|
||||
localtry_lock_t *lt; \
|
||||
bool _ret; \
|
||||
\
|
||||
preempt_disable(); \
|
||||
lt = this_cpu_ptr(lock); \
|
||||
if (!READ_ONCE(lt->acquired)) { \
|
||||
WRITE_ONCE(lt->acquired, 1); \
|
||||
local_trylock_acquire(<->llock); \
|
||||
_ret = true; \
|
||||
} else { \
|
||||
_ret = false; \
|
||||
preempt_enable(); \
|
||||
} \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define __localtry_trylock_irqsave(lock, flags) \
|
||||
({ \
|
||||
localtry_lock_t *lt; \
|
||||
bool _ret; \
|
||||
\
|
||||
local_irq_save(flags); \
|
||||
lt = this_cpu_ptr(lock); \
|
||||
if (!READ_ONCE(lt->acquired)) { \
|
||||
WRITE_ONCE(lt->acquired, 1); \
|
||||
local_trylock_acquire(<->llock); \
|
||||
_ret = true; \
|
||||
} else { \
|
||||
_ret = false; \
|
||||
local_irq_restore(flags); \
|
||||
} \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
#define __localtry_unlock(lock) \
|
||||
do { \
|
||||
localtry_lock_t *lt; \
|
||||
lt = this_cpu_ptr(lock); \
|
||||
WRITE_ONCE(lt->acquired, 0); \
|
||||
local_lock_release(<->llock); \
|
||||
preempt_enable(); \
|
||||
} while (0)
|
||||
|
||||
#define __localtry_unlock_irq(lock) \
|
||||
do { \
|
||||
localtry_lock_t *lt; \
|
||||
lt = this_cpu_ptr(lock); \
|
||||
WRITE_ONCE(lt->acquired, 0); \
|
||||
local_lock_release(<->llock); \
|
||||
local_irq_enable(); \
|
||||
} while (0)
|
||||
|
||||
#define __localtry_unlock_irqrestore(lock, flags) \
|
||||
do { \
|
||||
localtry_lock_t *lt; \
|
||||
lt = this_cpu_ptr(lock); \
|
||||
WRITE_ONCE(lt->acquired, 0); \
|
||||
local_lock_release(<->llock); \
|
||||
local_irq_restore(flags); \
|
||||
} while (0)
|
||||
|
||||
#else /* !CONFIG_PREEMPT_RT */
|
||||
|
||||
/*
|
||||
|
|
@ -125,8 +237,10 @@ do { \
|
|||
* critical section while staying preemptible.
|
||||
*/
|
||||
typedef spinlock_t local_lock_t;
|
||||
typedef spinlock_t localtry_lock_t;
|
||||
|
||||
#define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname))
|
||||
#define INIT_LOCALTRY_LOCK(lockname) INIT_LOCAL_LOCK(lockname)
|
||||
|
||||
#define __local_lock_init(l) \
|
||||
do { \
|
||||
|
|
@ -169,4 +283,36 @@ do { \
|
|||
spin_unlock(this_cpu_ptr((lock))); \
|
||||
} while (0)
|
||||
|
||||
/* localtry_lock_t variants */
|
||||
|
||||
#define __localtry_lock_init(lock) __local_lock_init(lock)
|
||||
#define __localtry_lock(lock) __local_lock(lock)
|
||||
#define __localtry_lock_irq(lock) __local_lock(lock)
|
||||
#define __localtry_lock_irqsave(lock, flags) __local_lock_irqsave(lock, flags)
|
||||
#define __localtry_unlock(lock) __local_unlock(lock)
|
||||
#define __localtry_unlock_irq(lock) __local_unlock(lock)
|
||||
#define __localtry_unlock_irqrestore(lock, flags) __local_unlock_irqrestore(lock, flags)
|
||||
|
||||
#define __localtry_trylock(lock) \
|
||||
({ \
|
||||
int __locked; \
|
||||
\
|
||||
if (in_nmi() | in_hardirq()) { \
|
||||
__locked = 0; \
|
||||
} else { \
|
||||
migrate_disable(); \
|
||||
__locked = spin_trylock(this_cpu_ptr((lock))); \
|
||||
if (!__locked) \
|
||||
migrate_enable(); \
|
||||
} \
|
||||
__locked; \
|
||||
})
|
||||
|
||||
#define __localtry_trylock_irqsave(lock, flags) \
|
||||
({ \
|
||||
typecheck(unsigned long, flags); \
|
||||
flags = 0; \
|
||||
__localtry_trylock(lock); \
|
||||
})
|
||||
|
||||
#endif /* CONFIG_PREEMPT_RT */
|
||||
|
|
|
|||
|
|
@ -99,6 +99,10 @@ struct page {
|
|||
/* Or, free page */
|
||||
struct list_head buddy_list;
|
||||
struct list_head pcp_list;
|
||||
struct {
|
||||
struct llist_node pcp_llist;
|
||||
unsigned int order;
|
||||
};
|
||||
};
|
||||
/* See page-flags.h for PAGE_MAPPING_FLAGS */
|
||||
struct address_space *mapping;
|
||||
|
|
|
|||
|
|
@ -972,6 +972,9 @@ struct zone {
|
|||
/* Primarily protects free_area */
|
||||
spinlock_t lock;
|
||||
|
||||
/* Pages to be freed when next trylock succeeds */
|
||||
struct llist_head trylock_free_pages;
|
||||
|
||||
/* Write-intensive fields used by compaction and vmstats. */
|
||||
CACHELINE_PADDING(_pad2_);
|
||||
|
||||
|
|
|
|||
|
|
@ -287,7 +287,7 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
|
|||
return VM_FAULT_SIGSEGV;
|
||||
|
||||
/* Account into memcg of the process that created bpf_arena */
|
||||
ret = bpf_map_alloc_pages(map, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE, 1, &page);
|
||||
ret = bpf_map_alloc_pages(map, NUMA_NO_NODE, 1, &page);
|
||||
if (ret) {
|
||||
range_tree_set(&arena->rt, vmf->pgoff, 1);
|
||||
return VM_FAULT_SIGSEGV;
|
||||
|
|
@ -465,8 +465,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
|
|||
if (ret)
|
||||
goto out_free_pages;
|
||||
|
||||
ret = bpf_map_alloc_pages(&arena->map, GFP_KERNEL | __GFP_ZERO,
|
||||
node_id, page_cnt, pages);
|
||||
ret = bpf_map_alloc_pages(&arena->map, node_id, page_cnt, pages);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
|
|
|
|||
|
|
@ -569,7 +569,24 @@ static void bpf_map_release_memcg(struct bpf_map *map)
|
|||
}
|
||||
#endif
|
||||
|
||||
int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
|
||||
static bool can_alloc_pages(void)
|
||||
{
|
||||
return preempt_count() == 0 && !irqs_disabled() &&
|
||||
!IS_ENABLED(CONFIG_PREEMPT_RT);
|
||||
}
|
||||
|
||||
static struct page *__bpf_alloc_page(int nid)
|
||||
{
|
||||
if (!can_alloc_pages())
|
||||
return try_alloc_pages(nid, 0);
|
||||
|
||||
return alloc_pages_node(nid,
|
||||
GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT
|
||||
| __GFP_NOWARN,
|
||||
0);
|
||||
}
|
||||
|
||||
int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
|
||||
unsigned long nr_pages, struct page **pages)
|
||||
{
|
||||
unsigned long i, j;
|
||||
|
|
@ -582,14 +599,14 @@ int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
|
|||
old_memcg = set_active_memcg(memcg);
|
||||
#endif
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
pg = alloc_pages_node(nid, gfp | __GFP_ACCOUNT, 0);
|
||||
pg = __bpf_alloc_page(nid);
|
||||
|
||||
if (pg) {
|
||||
pages[i] = pg;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < i; j++)
|
||||
__free_page(pages[j]);
|
||||
free_pages_nolock(pages[j], 0);
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -591,7 +591,8 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
|
|||
depot_stack_handle_t handle = 0;
|
||||
struct page *page = NULL;
|
||||
void *prealloc = NULL;
|
||||
bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC;
|
||||
bool allow_spin = gfpflags_allow_spinning(alloc_flags);
|
||||
bool can_alloc = (depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC) && allow_spin;
|
||||
unsigned long flags;
|
||||
u32 hash;
|
||||
|
||||
|
|
@ -630,7 +631,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
|
|||
prealloc = page_address(page);
|
||||
}
|
||||
|
||||
if (in_nmi()) {
|
||||
if (in_nmi() || !allow_spin) {
|
||||
/* We can never allocate in NMI context. */
|
||||
WARN_ON_ONCE(can_alloc);
|
||||
/* Best effort; bail if we fail to take the lock. */
|
||||
|
|
@ -671,7 +672,10 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
|
|||
exit:
|
||||
if (prealloc) {
|
||||
/* Stack depot didn't use this memory, free it. */
|
||||
free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER);
|
||||
if (!allow_spin)
|
||||
free_pages_nolock(virt_to_page(prealloc), DEPOT_POOL_ORDER);
|
||||
else
|
||||
free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER);
|
||||
}
|
||||
if (found)
|
||||
handle = found->handle.handle;
|
||||
|
|
|
|||
|
|
@ -1198,6 +1198,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
|
|||
#define ALLOC_NOFRAGMENT 0x0
|
||||
#endif
|
||||
#define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */
|
||||
#define ALLOC_TRYLOCK 0x400 /* Only use spin_trylock in allocation path */
|
||||
#define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
|
||||
|
||||
/* Flags that allow allocations below the min watermark. */
|
||||
|
|
|
|||
|
|
@ -1739,7 +1739,7 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
|
|||
}
|
||||
|
||||
struct memcg_stock_pcp {
|
||||
local_lock_t stock_lock;
|
||||
localtry_lock_t stock_lock;
|
||||
struct mem_cgroup *cached; /* this never be root cgroup */
|
||||
unsigned int nr_pages;
|
||||
|
||||
|
|
@ -1754,7 +1754,7 @@ struct memcg_stock_pcp {
|
|||
#define FLUSHING_CACHED_CHARGE 0
|
||||
};
|
||||
static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = {
|
||||
.stock_lock = INIT_LOCAL_LOCK(stock_lock),
|
||||
.stock_lock = INIT_LOCALTRY_LOCK(stock_lock),
|
||||
};
|
||||
static DEFINE_MUTEX(percpu_charge_mutex);
|
||||
|
||||
|
|
@ -1766,6 +1766,7 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
|
|||
* consume_stock: Try to consume stocked charge on this cpu.
|
||||
* @memcg: memcg to consume from.
|
||||
* @nr_pages: how many pages to charge.
|
||||
* @gfp_mask: allocation mask.
|
||||
*
|
||||
* The charges will only happen if @memcg matches the current cpu's memcg
|
||||
* stock, and at least @nr_pages are available in that stock. Failure to
|
||||
|
|
@ -1773,7 +1774,8 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
|
|||
*
|
||||
* returns true if successful, false otherwise.
|
||||
*/
|
||||
static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
|
||||
static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct memcg_stock_pcp *stock;
|
||||
unsigned int stock_pages;
|
||||
|
|
@ -1783,7 +1785,11 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
|
|||
if (nr_pages > MEMCG_CHARGE_BATCH)
|
||||
return ret;
|
||||
|
||||
local_lock_irqsave(&memcg_stock.stock_lock, flags);
|
||||
if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) {
|
||||
if (!gfpflags_allow_spinning(gfp_mask))
|
||||
return ret;
|
||||
localtry_lock_irqsave(&memcg_stock.stock_lock, flags);
|
||||
}
|
||||
|
||||
stock = this_cpu_ptr(&memcg_stock);
|
||||
stock_pages = READ_ONCE(stock->nr_pages);
|
||||
|
|
@ -1792,7 +1798,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
|
|||
ret = true;
|
||||
}
|
||||
|
||||
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
|
||||
localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -1831,14 +1837,14 @@ static void drain_local_stock(struct work_struct *dummy)
|
|||
* drain_stock races is that we always operate on local CPU stock
|
||||
* here with IRQ disabled
|
||||
*/
|
||||
local_lock_irqsave(&memcg_stock.stock_lock, flags);
|
||||
localtry_lock_irqsave(&memcg_stock.stock_lock, flags);
|
||||
|
||||
stock = this_cpu_ptr(&memcg_stock);
|
||||
old = drain_obj_stock(stock);
|
||||
drain_stock(stock);
|
||||
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
|
||||
|
||||
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
|
||||
localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
|
||||
obj_cgroup_put(old);
|
||||
}
|
||||
|
||||
|
|
@ -1868,9 +1874,20 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
|
|||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_lock_irqsave(&memcg_stock.stock_lock, flags);
|
||||
if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) {
|
||||
/*
|
||||
* In case of unlikely failure to lock percpu stock_lock
|
||||
* uncharge memcg directly.
|
||||
*/
|
||||
if (mem_cgroup_is_root(memcg))
|
||||
return;
|
||||
page_counter_uncharge(&memcg->memory, nr_pages);
|
||||
if (do_memsw_account())
|
||||
page_counter_uncharge(&memcg->memsw, nr_pages);
|
||||
return;
|
||||
}
|
||||
__refill_stock(memcg, nr_pages);
|
||||
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
|
||||
localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -1927,9 +1944,9 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
|
|||
stock = &per_cpu(memcg_stock, cpu);
|
||||
|
||||
/* drain_obj_stock requires stock_lock */
|
||||
local_lock_irqsave(&memcg_stock.stock_lock, flags);
|
||||
localtry_lock_irqsave(&memcg_stock.stock_lock, flags);
|
||||
old = drain_obj_stock(stock);
|
||||
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
|
||||
localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
|
||||
|
||||
drain_stock(stock);
|
||||
obj_cgroup_put(old);
|
||||
|
|
@ -2222,9 +2239,13 @@ int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
|||
unsigned long pflags;
|
||||
|
||||
retry:
|
||||
if (consume_stock(memcg, nr_pages))
|
||||
if (consume_stock(memcg, nr_pages, gfp_mask))
|
||||
return 0;
|
||||
|
||||
if (!gfpflags_allow_spinning(gfp_mask))
|
||||
/* Avoid the refill and flush of the older stock */
|
||||
batch = nr_pages;
|
||||
|
||||
if (!do_memsw_account() ||
|
||||
page_counter_try_charge(&memcg->memsw, batch, &counter)) {
|
||||
if (page_counter_try_charge(&memcg->memory, batch, &counter))
|
||||
|
|
@ -2708,7 +2729,7 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
|
|||
unsigned long flags;
|
||||
int *bytes;
|
||||
|
||||
local_lock_irqsave(&memcg_stock.stock_lock, flags);
|
||||
localtry_lock_irqsave(&memcg_stock.stock_lock, flags);
|
||||
stock = this_cpu_ptr(&memcg_stock);
|
||||
|
||||
/*
|
||||
|
|
@ -2761,7 +2782,7 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
|
|||
if (nr)
|
||||
__mod_objcg_mlstate(objcg, pgdat, idx, nr);
|
||||
|
||||
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
|
||||
localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
|
||||
obj_cgroup_put(old);
|
||||
}
|
||||
|
||||
|
|
@ -2771,7 +2792,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
|
|||
unsigned long flags;
|
||||
bool ret = false;
|
||||
|
||||
local_lock_irqsave(&memcg_stock.stock_lock, flags);
|
||||
localtry_lock_irqsave(&memcg_stock.stock_lock, flags);
|
||||
|
||||
stock = this_cpu_ptr(&memcg_stock);
|
||||
if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) {
|
||||
|
|
@ -2779,7 +2800,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
|
|||
ret = true;
|
||||
}
|
||||
|
||||
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
|
||||
localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -2871,7 +2892,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
|
|||
unsigned long flags;
|
||||
unsigned int nr_pages = 0;
|
||||
|
||||
local_lock_irqsave(&memcg_stock.stock_lock, flags);
|
||||
localtry_lock_irqsave(&memcg_stock.stock_lock, flags);
|
||||
|
||||
stock = this_cpu_ptr(&memcg_stock);
|
||||
if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */
|
||||
|
|
@ -2889,7 +2910,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
|
|||
stock->nr_bytes &= (PAGE_SIZE - 1);
|
||||
}
|
||||
|
||||
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
|
||||
localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags);
|
||||
obj_cgroup_put(old);
|
||||
|
||||
if (nr_pages)
|
||||
|
|
|
|||
203
mm/page_alloc.c
203
mm/page_alloc.c
|
|
@ -88,6 +88,9 @@ typedef int __bitwise fpi_t;
|
|||
*/
|
||||
#define FPI_TO_TAIL ((__force fpi_t)BIT(1))
|
||||
|
||||
/* Free the page without taking locks. Rely on trylock only. */
|
||||
#define FPI_TRYLOCK ((__force fpi_t)BIT(2))
|
||||
|
||||
/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
|
||||
static DEFINE_MUTEX(pcp_batch_high_lock);
|
||||
#define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8)
|
||||
|
|
@ -1249,13 +1252,44 @@ static void split_large_buddy(struct zone *zone, struct page *page,
|
|||
} while (1);
|
||||
}
|
||||
|
||||
static void add_page_to_zone_llist(struct zone *zone, struct page *page,
|
||||
unsigned int order)
|
||||
{
|
||||
/* Remember the order */
|
||||
page->order = order;
|
||||
/* Add the page to the free list */
|
||||
llist_add(&page->pcp_llist, &zone->trylock_free_pages);
|
||||
}
|
||||
|
||||
static void free_one_page(struct zone *zone, struct page *page,
|
||||
unsigned long pfn, unsigned int order,
|
||||
fpi_t fpi_flags)
|
||||
{
|
||||
struct llist_head *llhead;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&zone->lock, flags);
|
||||
if (!spin_trylock_irqsave(&zone->lock, flags)) {
|
||||
if (unlikely(fpi_flags & FPI_TRYLOCK)) {
|
||||
add_page_to_zone_llist(zone, page, order);
|
||||
return;
|
||||
}
|
||||
spin_lock_irqsave(&zone->lock, flags);
|
||||
}
|
||||
|
||||
/* The lock succeeded. Process deferred pages. */
|
||||
llhead = &zone->trylock_free_pages;
|
||||
if (unlikely(!llist_empty(llhead) && !(fpi_flags & FPI_TRYLOCK))) {
|
||||
struct llist_node *llnode;
|
||||
struct page *p, *tmp;
|
||||
|
||||
llnode = llist_del_all(llhead);
|
||||
llist_for_each_entry_safe(p, tmp, llnode, pcp_llist) {
|
||||
unsigned int p_order = p->order;
|
||||
|
||||
split_large_buddy(zone, p, page_to_pfn(p), p_order, fpi_flags);
|
||||
__count_vm_events(PGFREE, 1 << p_order);
|
||||
}
|
||||
}
|
||||
split_large_buddy(zone, page, pfn, order, fpi_flags);
|
||||
spin_unlock_irqrestore(&zone->lock, flags);
|
||||
|
||||
|
|
@ -2307,7 +2341,11 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
|||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
spin_lock_irqsave(&zone->lock, flags);
|
||||
if (!spin_trylock_irqsave(&zone->lock, flags)) {
|
||||
if (unlikely(alloc_flags & ALLOC_TRYLOCK))
|
||||
return 0;
|
||||
spin_lock_irqsave(&zone->lock, flags);
|
||||
}
|
||||
for (i = 0; i < count; ++i) {
|
||||
struct page *page = __rmqueue(zone, order, migratetype,
|
||||
alloc_flags);
|
||||
|
|
@ -2595,7 +2633,7 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone,
|
|||
|
||||
static void free_frozen_page_commit(struct zone *zone,
|
||||
struct per_cpu_pages *pcp, struct page *page, int migratetype,
|
||||
unsigned int order)
|
||||
unsigned int order, fpi_t fpi_flags)
|
||||
{
|
||||
int high, batch;
|
||||
int pindex;
|
||||
|
|
@ -2630,6 +2668,14 @@ static void free_frozen_page_commit(struct zone *zone,
|
|||
}
|
||||
if (pcp->free_count < (batch << CONFIG_PCP_BATCH_SCALE_MAX))
|
||||
pcp->free_count += (1 << order);
|
||||
|
||||
if (unlikely(fpi_flags & FPI_TRYLOCK)) {
|
||||
/*
|
||||
* Do not attempt to take a zone lock. Let pcp->count get
|
||||
* over high mark temporarily.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
high = nr_pcp_high(pcp, zone, batch, free_high);
|
||||
if (pcp->count >= high) {
|
||||
free_pcppages_bulk(zone, nr_pcp_free(pcp, batch, high, free_high),
|
||||
|
|
@ -2644,7 +2690,8 @@ static void free_frozen_page_commit(struct zone *zone,
|
|||
/*
|
||||
* Free a pcp page
|
||||
*/
|
||||
void free_frozen_pages(struct page *page, unsigned int order)
|
||||
static void __free_frozen_pages(struct page *page, unsigned int order,
|
||||
fpi_t fpi_flags)
|
||||
{
|
||||
unsigned long __maybe_unused UP_flags;
|
||||
struct per_cpu_pages *pcp;
|
||||
|
|
@ -2653,7 +2700,7 @@ void free_frozen_pages(struct page *page, unsigned int order)
|
|||
int migratetype;
|
||||
|
||||
if (!pcp_allowed_order(order)) {
|
||||
__free_pages_ok(page, order, FPI_NONE);
|
||||
__free_pages_ok(page, order, fpi_flags);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -2671,23 +2718,33 @@ void free_frozen_pages(struct page *page, unsigned int order)
|
|||
migratetype = get_pfnblock_migratetype(page, pfn);
|
||||
if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
|
||||
if (unlikely(is_migrate_isolate(migratetype))) {
|
||||
free_one_page(zone, page, pfn, order, FPI_NONE);
|
||||
free_one_page(zone, page, pfn, order, fpi_flags);
|
||||
return;
|
||||
}
|
||||
migratetype = MIGRATE_MOVABLE;
|
||||
}
|
||||
|
||||
if (unlikely((fpi_flags & FPI_TRYLOCK) && IS_ENABLED(CONFIG_PREEMPT_RT)
|
||||
&& (in_nmi() || in_hardirq()))) {
|
||||
add_page_to_zone_llist(zone, page, order);
|
||||
return;
|
||||
}
|
||||
pcp_trylock_prepare(UP_flags);
|
||||
pcp = pcp_spin_trylock(zone->per_cpu_pageset);
|
||||
if (pcp) {
|
||||
free_frozen_page_commit(zone, pcp, page, migratetype, order);
|
||||
free_frozen_page_commit(zone, pcp, page, migratetype, order, fpi_flags);
|
||||
pcp_spin_unlock(pcp);
|
||||
} else {
|
||||
free_one_page(zone, page, pfn, order, FPI_NONE);
|
||||
free_one_page(zone, page, pfn, order, fpi_flags);
|
||||
}
|
||||
pcp_trylock_finish(UP_flags);
|
||||
}
|
||||
|
||||
void free_frozen_pages(struct page *page, unsigned int order)
|
||||
{
|
||||
__free_frozen_pages(page, order, FPI_NONE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a batch of folios
|
||||
*/
|
||||
|
|
@ -2776,7 +2833,7 @@ void free_unref_folios(struct folio_batch *folios)
|
|||
|
||||
trace_mm_page_free_batched(&folio->page);
|
||||
free_frozen_page_commit(zone, pcp, &folio->page, migratetype,
|
||||
order);
|
||||
order, FPI_NONE);
|
||||
}
|
||||
|
||||
if (pcp) {
|
||||
|
|
@ -2907,7 +2964,11 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
|
|||
|
||||
do {
|
||||
page = NULL;
|
||||
spin_lock_irqsave(&zone->lock, flags);
|
||||
if (!spin_trylock_irqsave(&zone->lock, flags)) {
|
||||
if (unlikely(alloc_flags & ALLOC_TRYLOCK))
|
||||
return NULL;
|
||||
spin_lock_irqsave(&zone->lock, flags);
|
||||
}
|
||||
if (alloc_flags & ALLOC_HIGHATOMIC)
|
||||
page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
|
||||
if (!page) {
|
||||
|
|
@ -4512,7 +4573,12 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
|
|||
|
||||
might_alloc(gfp_mask);
|
||||
|
||||
if (should_fail_alloc_page(gfp_mask, order))
|
||||
/*
|
||||
* Don't invoke should_fail logic, since it may call
|
||||
* get_random_u32() and printk() which need to spin_lock.
|
||||
*/
|
||||
if (!(*alloc_flags & ALLOC_TRYLOCK) &&
|
||||
should_fail_alloc_page(gfp_mask, order))
|
||||
return false;
|
||||
|
||||
*alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, *alloc_flags);
|
||||
|
|
@ -4810,9 +4876,10 @@ unsigned long get_zeroed_page_noprof(gfp_t gfp_mask)
|
|||
EXPORT_SYMBOL(get_zeroed_page_noprof);
|
||||
|
||||
/**
|
||||
* __free_pages - Free pages allocated with alloc_pages().
|
||||
* ___free_pages - Free pages allocated with alloc_pages().
|
||||
* @page: The page pointer returned from alloc_pages().
|
||||
* @order: The order of the allocation.
|
||||
* @fpi_flags: Free Page Internal flags.
|
||||
*
|
||||
* This function can free multi-page allocations that are not compound
|
||||
* pages. It does not check that the @order passed in matches that of
|
||||
|
|
@ -4829,22 +4896,37 @@ EXPORT_SYMBOL(get_zeroed_page_noprof);
|
|||
* Context: May be called in interrupt context or while holding a normal
|
||||
* spinlock, but not in NMI context or while holding a raw spinlock.
|
||||
*/
|
||||
void __free_pages(struct page *page, unsigned int order)
|
||||
static void ___free_pages(struct page *page, unsigned int order,
|
||||
fpi_t fpi_flags)
|
||||
{
|
||||
/* get PageHead before we drop reference */
|
||||
int head = PageHead(page);
|
||||
struct alloc_tag *tag = pgalloc_tag_get(page);
|
||||
|
||||
if (put_page_testzero(page))
|
||||
free_frozen_pages(page, order);
|
||||
__free_frozen_pages(page, order, fpi_flags);
|
||||
else if (!head) {
|
||||
pgalloc_tag_sub_pages(tag, (1 << order) - 1);
|
||||
while (order-- > 0)
|
||||
free_frozen_pages(page + (1 << order), order);
|
||||
__free_frozen_pages(page + (1 << order), order,
|
||||
fpi_flags);
|
||||
}
|
||||
}
|
||||
void __free_pages(struct page *page, unsigned int order)
|
||||
{
|
||||
___free_pages(page, order, FPI_NONE);
|
||||
}
|
||||
EXPORT_SYMBOL(__free_pages);
|
||||
|
||||
/*
|
||||
* Can be called while holding raw_spin_lock or from IRQ and NMI for any
|
||||
* page type (not only those that came from try_alloc_pages)
|
||||
*/
|
||||
void free_pages_nolock(struct page *page, unsigned int order)
|
||||
{
|
||||
___free_pages(page, order, FPI_TRYLOCK);
|
||||
}
|
||||
|
||||
void free_pages(unsigned long addr, unsigned int order)
|
||||
{
|
||||
if (addr != 0) {
|
||||
|
|
@ -7081,3 +7163,94 @@ static bool __free_unaccepted(struct page *page)
|
|||
}
|
||||
|
||||
#endif /* CONFIG_UNACCEPTED_MEMORY */
|
||||
|
||||
/**
|
||||
* try_alloc_pages - opportunistic reentrant allocation from any context
|
||||
* @nid: node to allocate from
|
||||
* @order: allocation order size
|
||||
*
|
||||
* Allocates pages of a given order from the given node. This is safe to
|
||||
* call from any context (from atomic, NMI, and also reentrant
|
||||
* allocator -> tracepoint -> try_alloc_pages_noprof).
|
||||
* Allocation is best effort and to be expected to fail easily so nobody should
|
||||
* rely on the success. Failures are not reported via warn_alloc().
|
||||
* See always fail conditions below.
|
||||
*
|
||||
* Return: allocated page or NULL on failure.
|
||||
*/
|
||||
struct page *try_alloc_pages_noprof(int nid, unsigned int order)
|
||||
{
|
||||
/*
|
||||
* Do not specify __GFP_DIRECT_RECLAIM, since direct claim is not allowed.
|
||||
* Do not specify __GFP_KSWAPD_RECLAIM either, since wake up of kswapd
|
||||
* is not safe in arbitrary context.
|
||||
*
|
||||
* These two are the conditions for gfpflags_allow_spinning() being true.
|
||||
*
|
||||
* Specify __GFP_NOWARN since failing try_alloc_pages() is not a reason
|
||||
* to warn. Also warn would trigger printk() which is unsafe from
|
||||
* various contexts. We cannot use printk_deferred_enter() to mitigate,
|
||||
* since the running context is unknown.
|
||||
*
|
||||
* Specify __GFP_ZERO to make sure that call to kmsan_alloc_page() below
|
||||
* is safe in any context. Also zeroing the page is mandatory for
|
||||
* BPF use cases.
|
||||
*
|
||||
* Though __GFP_NOMEMALLOC is not checked in the code path below,
|
||||
* specify it here to highlight that try_alloc_pages()
|
||||
* doesn't want to deplete reserves.
|
||||
*/
|
||||
gfp_t alloc_gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_NOMEMALLOC
|
||||
| __GFP_ACCOUNT;
|
||||
unsigned int alloc_flags = ALLOC_TRYLOCK;
|
||||
struct alloc_context ac = { };
|
||||
struct page *page;
|
||||
|
||||
/*
|
||||
* In PREEMPT_RT spin_trylock() will call raw_spin_lock() which is
|
||||
* unsafe in NMI. If spin_trylock() is called from hard IRQ the current
|
||||
* task may be waiting for one rt_spin_lock, but rt_spin_trylock() will
|
||||
* mark the task as the owner of another rt_spin_lock which will
|
||||
* confuse PI logic, so return immediately if called form hard IRQ or
|
||||
* NMI.
|
||||
*
|
||||
* Note, irqs_disabled() case is ok. This function can be called
|
||||
* from raw_spin_lock_irqsave region.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT) && (in_nmi() || in_hardirq()))
|
||||
return NULL;
|
||||
if (!pcp_allowed_order(order))
|
||||
return NULL;
|
||||
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
/* Bailout, since try_to_accept_memory_one() needs to take a lock */
|
||||
if (has_unaccepted_memory())
|
||||
return NULL;
|
||||
#endif
|
||||
/* Bailout, since _deferred_grow_zone() needs to take a lock */
|
||||
if (deferred_pages_enabled())
|
||||
return NULL;
|
||||
|
||||
if (nid == NUMA_NO_NODE)
|
||||
nid = numa_node_id();
|
||||
|
||||
prepare_alloc_pages(alloc_gfp, order, nid, NULL, &ac,
|
||||
&alloc_gfp, &alloc_flags);
|
||||
|
||||
/*
|
||||
* Best effort allocation from percpu free list.
|
||||
* If it's empty attempt to spin_trylock zone->lock.
|
||||
*/
|
||||
page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
|
||||
|
||||
/* Unlike regular alloc_pages() there is no __alloc_pages_slowpath(). */
|
||||
|
||||
if (memcg_kmem_online() && page &&
|
||||
unlikely(__memcg_kmem_charge_page(page, alloc_gfp, order) != 0)) {
|
||||
free_pages_nolock(page, order);
|
||||
page = NULL;
|
||||
}
|
||||
trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
|
||||
kmsan_alloc_page(page, order, alloc_gfp);
|
||||
return page;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -294,7 +294,13 @@ void __reset_page_owner(struct page *page, unsigned short order)
|
|||
page_owner = get_page_owner(page_ext);
|
||||
alloc_handle = page_owner->handle;
|
||||
|
||||
handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
|
||||
/*
|
||||
* Do not specify GFP_NOWAIT to make gfpflags_allow_spinning() == false
|
||||
* to prevent issues in stack_depot_save().
|
||||
* This is similar to try_alloc_pages() gfp flags, but only used
|
||||
* to signal stack_depot to avoid spin_locks.
|
||||
*/
|
||||
handle = save_stack(__GFP_NOWARN);
|
||||
__update_page_owner_free_handle(page_ext, handle, order, current->pid,
|
||||
current->tgid, free_ts_nsec);
|
||||
page_ext_put(page_ext);
|
||||
|
|
|
|||
Loading…
Reference in New Issue