From d6c2f41eb99cdf41f050f5e41405d2ed143ff4ef Mon Sep 17 00:00:00 2001 From: Simon Buttgereit Date: Thu, 25 Sep 2025 09:57:26 +0200 Subject: [PATCH 1/7] libceph: fix log output race condition in OSD client OSD client logging has a problem in get_osd() and put_osd(). For one logging output refcount_read() is called twice. If recount value changes between both calls logging output is not consistent. This patch prints out only the resulting value. [ idryomov: don't make the log messages more verbose ] Signed-off-by: Simon Buttgereit Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- net/ceph/osd_client.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 6664ea73ccf8..3667319b949d 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1280,8 +1280,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) static struct ceph_osd *get_osd(struct ceph_osd *osd) { if (refcount_inc_not_zero(&osd->o_ref)) { - dout("get_osd %p %d -> %d\n", osd, refcount_read(&osd->o_ref)-1, - refcount_read(&osd->o_ref)); + dout("get_osd %p -> %d\n", osd, refcount_read(&osd->o_ref)); return osd; } else { dout("get_osd %p FAIL\n", osd); @@ -1291,8 +1290,7 @@ static struct ceph_osd *get_osd(struct ceph_osd *osd) static void put_osd(struct ceph_osd *osd) { - dout("put_osd %p %d -> %d\n", osd, refcount_read(&osd->o_ref), - refcount_read(&osd->o_ref) - 1); + dout("put_osd %p -> %d\n", osd, refcount_read(&osd->o_ref) - 1); if (refcount_dec_and_test(&osd->o_ref)) { osd_cleanup(osd); kfree(osd); From d927a595ab2f6de4e10b3e3962bc70ab61d8f907 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Thu, 25 Sep 2025 12:45:12 +0200 Subject: [PATCH 2/7] ceph: add trace points to the MDS client This patch adds trace points to the Ceph filesystem MDS client: - request submission (CEPH_MSG_CLIENT_REQUEST) and completion (CEPH_MSG_CLIENT_REPLY) - capabilities (CEPH_MSG_CLIENT_CAPS) These are the central pieces that are useful for analyzing MDS latency/performance problems from the client's perspective. In the long run, all doutc() calls should be replaced with tracepoints. This way, the Ceph filesystem can be traced at any time (without spamming the kernel log). Additionally, trace points can be used in BPF programs (which can even deference the pointer parameters and extract more values). Signed-off-by: Max Kellermann Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 4 + fs/ceph/mds_client.c | 20 ++- fs/ceph/super.c | 3 + include/trace/events/ceph.h | 234 ++++++++++++++++++++++++++++++++++++ 4 files changed, 259 insertions(+), 2 deletions(-) create mode 100644 include/trace/events/ceph.h diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b1a8ff612c41..2f663972da99 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -18,6 +18,7 @@ #include "crypto.h" #include #include +#include /* * Capability management @@ -4452,6 +4453,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, session->s_mds, ceph_cap_op_name(op), vino.ino, vino.snap, inode, seq, issue_seq, mseq); + trace_ceph_handle_caps(mdsc, session, op, &vino, ceph_inode(inode), + seq, issue_seq, mseq); + mutex_lock(&session->s_mutex); if (!inode) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1740047aef0f..7e4eab824dae 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -24,6 +24,7 @@ #include #include #include +#include #define RECONNECT_MAX_SIZE (INT_MAX - PAGE_SIZE) @@ -3288,6 +3289,8 @@ static void complete_request(struct ceph_mds_client *mdsc, { req->r_end_latency = ktime_get(); + trace_ceph_mdsc_complete_request(mdsc, req); + if (req->r_callback) req->r_callback(mdsc, req); complete_all(&req->r_completion); @@ -3419,6 +3422,8 @@ static int __send_request(struct ceph_mds_session *session, { int err; + trace_ceph_mdsc_send_request(session, req); + err = __prepare_send_request(session, req, drop_cap_releases); if (!err) { ceph_msg_get(req->r_request); @@ -3470,6 +3475,8 @@ static void __do_request(struct ceph_mds_client *mdsc, } if (mdsc->mdsmap->m_epoch == 0) { doutc(cl, "no mdsmap, waiting for map\n"); + trace_ceph_mdsc_suspend_request(mdsc, session, req, + ceph_mdsc_suspend_reason_no_mdsmap); list_add(&req->r_wait, &mdsc->waiting_for_map); return; } @@ -3491,6 +3498,8 @@ static void __do_request(struct ceph_mds_client *mdsc, goto finish; } doutc(cl, "no mds or not active, waiting for map\n"); + trace_ceph_mdsc_suspend_request(mdsc, session, req, + ceph_mdsc_suspend_reason_no_active_mds); list_add(&req->r_wait, &mdsc->waiting_for_map); return; } @@ -3536,9 +3545,11 @@ static void __do_request(struct ceph_mds_client *mdsc, * it to the mdsc queue. */ if (session->s_state == CEPH_MDS_SESSION_REJECTED) { - if (ceph_test_mount_opt(mdsc->fsc, CLEANRECOVER)) + if (ceph_test_mount_opt(mdsc->fsc, CLEANRECOVER)) { + trace_ceph_mdsc_suspend_request(mdsc, session, req, + ceph_mdsc_suspend_reason_rejected); list_add(&req->r_wait, &mdsc->waiting_for_map); - else + } else err = -EACCES; goto out_session; } @@ -3552,6 +3563,8 @@ static void __do_request(struct ceph_mds_client *mdsc, if (random) req->r_resend_mds = mds; } + trace_ceph_mdsc_suspend_request(mdsc, session, req, + ceph_mdsc_suspend_reason_session); list_add(&req->r_wait, &session->s_waiting); goto out_session; } @@ -3652,6 +3665,7 @@ static void __wake_requests(struct ceph_mds_client *mdsc, list_del_init(&req->r_wait); doutc(cl, " wake request %p tid %llu\n", req, req->r_tid); + trace_ceph_mdsc_resume_request(mdsc, req); __do_request(mdsc, req); } } @@ -3678,6 +3692,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) req->r_session->s_mds == mds) { doutc(cl, " kicking tid %llu\n", req->r_tid); list_del_init(&req->r_wait); + trace_ceph_mdsc_resume_request(mdsc, req); __do_request(mdsc, req); } } @@ -3724,6 +3739,7 @@ int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, doutc(cl, "submit_request on %p for inode %p\n", req, dir); mutex_lock(&mdsc->mutex); __register_request(mdsc, req, dir); + trace_ceph_mdsc_submit_request(mdsc, req); __do_request(mdsc, req); err = req->r_err; mutex_unlock(&mdsc->mutex); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f6bf24b5c683..7c1c1dac320d 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -30,6 +30,9 @@ #include +#define CREATE_TRACE_POINTS +#include + static DEFINE_SPINLOCK(ceph_fsc_lock); static LIST_HEAD(ceph_fsc_list); diff --git a/include/trace/events/ceph.h b/include/trace/events/ceph.h new file mode 100644 index 000000000000..08cb0659fbfc --- /dev/null +++ b/include/trace/events/ceph.h @@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Ceph filesystem support module tracepoints + * + * Copyright (C) 2025 IONOS SE. All Rights Reserved. + * Written by Max Kellermann (max.kellermann@ionos.com) + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ceph + +#if !defined(_TRACE_CEPH_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_CEPH_H + +#include + +#define ceph_mdsc_suspend_reasons \ + EM(ceph_mdsc_suspend_reason_no_mdsmap, "no-mdsmap") \ + EM(ceph_mdsc_suspend_reason_no_active_mds, "no-active-mds") \ + EM(ceph_mdsc_suspend_reason_rejected, "rejected") \ + E_(ceph_mdsc_suspend_reason_session, "session") + +#ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY +#define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY + +#undef EM +#undef E_ +#define EM(a, b) a, +#define E_(a, b) a + +enum ceph_mdsc_suspend_reason { ceph_mdsc_suspend_reasons } __mode(byte); + +#endif + +/* + * Export enum symbols via userspace. + */ +#undef EM +#undef E_ +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define E_(a, b) TRACE_DEFINE_ENUM(a); + +ceph_mdsc_suspend_reasons; + +/* + * Now redefine the EM() and E_() macros to map the enums to the strings that + * will be printed in the output. + */ +#undef EM +#undef E_ +#define EM(a, b) { a, b }, +#define E_(a, b) { a, b } + +TRACE_EVENT(ceph_mdsc_submit_request, + TP_PROTO(struct ceph_mds_client *mdsc, + struct ceph_mds_request *req), + + TP_ARGS(mdsc, req), + + TP_STRUCT__entry( + __field(u64, tid) + __field(int, op) + __field(u64, ino) + __field(u64, snap) + ), + + TP_fast_assign( + struct inode *inode; + + __entry->tid = req->r_tid; + __entry->op = req->r_op; + + inode = req->r_inode; + if (inode == NULL && req->r_dentry) + inode = d_inode(req->r_dentry); + + if (inode) { + __entry->ino = ceph_ino(inode); + __entry->snap = ceph_snap(inode); + } else { + __entry->ino = __entry->snap = 0; + } + ), + + TP_printk("R=%llu op=%s ino=%llx,%llx", + __entry->tid, + ceph_mds_op_name(__entry->op), + __entry->ino, __entry->snap) +); + +TRACE_EVENT(ceph_mdsc_suspend_request, + TP_PROTO(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct ceph_mds_request *req, + enum ceph_mdsc_suspend_reason reason), + + TP_ARGS(mdsc, session, req, reason), + + TP_STRUCT__entry( + __field(u64, tid) + __field(int, op) + __field(int, mds) + __field(enum ceph_mdsc_suspend_reason, reason) + ), + + TP_fast_assign( + __entry->tid = req->r_tid; + __entry->op = req->r_op; + __entry->mds = session ? session->s_mds : -1; + __entry->reason = reason; + ), + + TP_printk("R=%llu op=%s reason=%s", + __entry->tid, + ceph_mds_op_name(__entry->op), + __print_symbolic(__entry->reason, ceph_mdsc_suspend_reasons)) +); + +TRACE_EVENT(ceph_mdsc_resume_request, + TP_PROTO(struct ceph_mds_client *mdsc, + struct ceph_mds_request *req), + + TP_ARGS(mdsc, req), + + TP_STRUCT__entry( + __field(u64, tid) + __field(int, op) + ), + + TP_fast_assign( + __entry->tid = req->r_tid; + __entry->op = req->r_op; + ), + + TP_printk("R=%llu op=%s", + __entry->tid, + ceph_mds_op_name(__entry->op)) +); + +TRACE_EVENT(ceph_mdsc_send_request, + TP_PROTO(struct ceph_mds_session *session, + struct ceph_mds_request *req), + + TP_ARGS(session, req), + + TP_STRUCT__entry( + __field(u64, tid) + __field(int, op) + __field(int, mds) + ), + + TP_fast_assign( + __entry->tid = req->r_tid; + __entry->op = req->r_op; + __entry->mds = session->s_mds; + ), + + TP_printk("R=%llu op=%s mds=%d", + __entry->tid, + ceph_mds_op_name(__entry->op), + __entry->mds) +); + +TRACE_EVENT(ceph_mdsc_complete_request, + TP_PROTO(struct ceph_mds_client *mdsc, + struct ceph_mds_request *req), + + TP_ARGS(mdsc, req), + + TP_STRUCT__entry( + __field(u64, tid) + __field(int, op) + __field(int, err) + __field(unsigned long, latency_ns) + ), + + TP_fast_assign( + __entry->tid = req->r_tid; + __entry->op = req->r_op; + __entry->err = req->r_err; + __entry->latency_ns = req->r_end_latency - req->r_start_latency; + ), + + TP_printk("R=%llu op=%s err=%d latency_ns=%lu", + __entry->tid, + ceph_mds_op_name(__entry->op), + __entry->err, + __entry->latency_ns) +); + +TRACE_EVENT(ceph_handle_caps, + TP_PROTO(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + int op, + const struct ceph_vino *vino, + struct ceph_inode_info *inode, + u32 seq, u32 mseq, u32 issue_seq), + + TP_ARGS(mdsc, session, op, vino, inode, seq, mseq, issue_seq), + + TP_STRUCT__entry( + __field(int, mds) + __field(int, op) + __field(u64, ino) + __field(u64, snap) + __field(u32, seq) + __field(u32, mseq) + __field(u32, issue_seq) + ), + + TP_fast_assign( + __entry->mds = session->s_mds; + __entry->op = op; + __entry->ino = vino->ino; + __entry->snap = vino->snap; + __entry->seq = seq; + __entry->mseq = mseq; + __entry->issue_seq = issue_seq; + ), + + TP_printk("mds=%d op=%s vino=%llx.%llx seq=%u iseq=%u mseq=%u", + __entry->mds, + ceph_cap_op_name(__entry->op), + __entry->ino, + __entry->snap, + __entry->seq, + __entry->issue_seq, + __entry->mseq) +); + +#undef EM +#undef E_ +#endif /* _TRACE_CEPH_H */ + +/* This part must be outside protection */ +#include From 87327d4eaaeafd3a2f6a1ffe84d6d25a96a2495d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 Nov 2025 15:44:04 +0100 Subject: [PATCH 3/7] ceph: Amend checking to fix `make W=1` build breakage In a few cases the code compares 32-bit value to a SIZE_MAX derived constant which is much higher than that value on 64-bit platforms, Clang, in particular, is not happy about this fs/ceph/snap.c:377:10: error: result of comparison of constant 2305843009213693948 with expression of type 'u32' (aka 'unsigned int') is always false [-Werror,-Wtautological-constant-out-of-range-compare] 377 | if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) | ~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix this by casting to size_t. Note, that possible replacement of SIZE_MAX by U32_MAX may lead to the behaviour changes on the corner cases. Signed-off-by: Andy Shevchenko Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- fs/ceph/snap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index c65f2b202b2b..521507ea8260 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -374,7 +374,7 @@ static int build_snap_context(struct ceph_mds_client *mdsc, /* alloc new snap context */ err = -ENOMEM; - if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) + if ((size_t)num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) goto fail; snapc = ceph_create_snap_context(num, GFP_NOFS); if (!snapc) From 04d8712b079327409b09dee628378f9583e2e035 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 Nov 2025 15:46:53 +0100 Subject: [PATCH 4/7] libceph: Amend checking to fix `make W=1` build breakage In a few cases the code compares 32-bit value to a SIZE_MAX derived constant which is much higher than that value on 64-bit platforms, Clang, in particular, is not happy about this net/ceph/osdmap.c:1441:10: error: result of comparison of constant 4611686018427387891 with expression of type 'u32' (aka 'unsigned int') is always false [-Werror,-Wtautological-constant-out-of-range-compare] 1441 | if (len > (SIZE_MAX - sizeof(*pg)) / sizeof(u32)) | ~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/ceph/osdmap.c:1624:10: error: result of comparison of constant 2305843009213693945 with expression of type 'u32' (aka 'unsigned int') is always false [-Werror,-Wtautological-constant-out-of-range-compare] 1624 | if (len > (SIZE_MAX - sizeof(*pg)) / (2 * sizeof(u32))) | ~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix this by casting to size_t. Note, that possible replacement of SIZE_MAX by U32_MAX may lead to the behaviour changes on the corner cases. Signed-off-by: Andy Shevchenko Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- net/ceph/osdmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index d245fa508e1c..329eeea2f922 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1438,7 +1438,7 @@ static struct ceph_pg_mapping *__decode_pg_temp(void **p, void *end, ceph_decode_32_safe(p, end, len, e_inval); if (len == 0 && incremental) return NULL; /* new_pg_temp: [] to remove */ - if (len > (SIZE_MAX - sizeof(*pg)) / sizeof(u32)) + if ((size_t)len > (SIZE_MAX - sizeof(*pg)) / sizeof(u32)) return ERR_PTR(-EINVAL); ceph_decode_need(p, end, len * sizeof(u32), e_inval); @@ -1619,7 +1619,7 @@ static struct ceph_pg_mapping *__decode_pg_upmap_items(void **p, void *end, u32 len, i; ceph_decode_32_safe(p, end, len, e_inval); - if (len > (SIZE_MAX - sizeof(*pg)) / (2 * sizeof(u32))) + if ((size_t)len > (SIZE_MAX - sizeof(*pg)) / (2 * sizeof(u32))) return ERR_PTR(-EINVAL); ceph_decode_need(p, end, 2 * len * sizeof(u32), e_inval); From 8c738512714e8c0aa18f8a10c072d5b01c83db39 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 2 Dec 2025 10:32:31 +0100 Subject: [PATCH 5/7] libceph: make decode_pool() more resilient against corrupted osdmaps If the osdmap is (maliciously) corrupted such that the encoded length of ceph_pg_pool envelope is less than what is expected for a particular encoding version, out-of-bounds reads may ensue because the only bounds check that is there is based on that length value. This patch adds explicit bounds checks for each field that is decoded or skipped. Cc: stable@vger.kernel.org Reported-by: ziming zhang Signed-off-by: Ilya Dryomov Reviewed-by: Xiubo Li Tested-by: ziming zhang --- net/ceph/osdmap.c | 116 +++++++++++++++++++++------------------------- 1 file changed, 52 insertions(+), 64 deletions(-) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 329eeea2f922..34b3ab59602f 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -806,51 +806,49 @@ static int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) ceph_decode_need(p, end, len, bad); pool_end = *p + len; + ceph_decode_need(p, end, 4 + 4 + 4, bad); pi->type = ceph_decode_8(p); pi->size = ceph_decode_8(p); pi->crush_ruleset = ceph_decode_8(p); pi->object_hash = ceph_decode_8(p); - pi->pg_num = ceph_decode_32(p); pi->pgp_num = ceph_decode_32(p); - *p += 4 + 4; /* skip lpg* */ - *p += 4; /* skip last_change */ - *p += 8 + 4; /* skip snap_seq, snap_epoch */ + /* lpg*, last_change, snap_seq, snap_epoch */ + ceph_decode_skip_n(p, end, 8 + 4 + 8 + 4, bad); /* skip snaps */ - num = ceph_decode_32(p); + ceph_decode_32_safe(p, end, num, bad); while (num--) { - *p += 8; /* snapid key */ - *p += 1 + 1; /* versions */ - len = ceph_decode_32(p); - *p += len; + /* snapid key, pool snap (with versions) */ + ceph_decode_skip_n(p, end, 8 + 2, bad); + ceph_decode_skip_string(p, end, bad); } - /* skip removed_snaps */ - num = ceph_decode_32(p); - *p += num * (8 + 8); + /* removed_snaps */ + ceph_decode_skip_map(p, end, 64, 64, bad); + ceph_decode_need(p, end, 8 + 8 + 4, bad); *p += 8; /* skip auid */ pi->flags = ceph_decode_64(p); *p += 4; /* skip crash_replay_interval */ if (ev >= 7) - pi->min_size = ceph_decode_8(p); + ceph_decode_8_safe(p, end, pi->min_size, bad); else pi->min_size = pi->size - pi->size / 2; if (ev >= 8) - *p += 8 + 8; /* skip quota_max_* */ + /* quota_max_* */ + ceph_decode_skip_n(p, end, 8 + 8, bad); if (ev >= 9) { - /* skip tiers */ - num = ceph_decode_32(p); - *p += num * 8; + /* tiers */ + ceph_decode_skip_set(p, end, 64, bad); + ceph_decode_need(p, end, 8 + 1 + 8 + 8, bad); *p += 8; /* skip tier_of */ *p += 1; /* skip cache_mode */ - pi->read_tier = ceph_decode_64(p); pi->write_tier = ceph_decode_64(p); } else { @@ -858,86 +856,76 @@ static int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) pi->write_tier = -1; } - if (ev >= 10) { - /* skip properties */ - num = ceph_decode_32(p); - while (num--) { - len = ceph_decode_32(p); - *p += len; /* key */ - len = ceph_decode_32(p); - *p += len; /* val */ - } - } + if (ev >= 10) + /* properties */ + ceph_decode_skip_map(p, end, string, string, bad); if (ev >= 11) { - /* skip hit_set_params */ - *p += 1 + 1; /* versions */ - len = ceph_decode_32(p); - *p += len; + /* hit_set_params (with versions) */ + ceph_decode_skip_n(p, end, 2, bad); + ceph_decode_skip_string(p, end, bad); - *p += 4; /* skip hit_set_period */ - *p += 4; /* skip hit_set_count */ + /* hit_set_period, hit_set_count */ + ceph_decode_skip_n(p, end, 4 + 4, bad); } if (ev >= 12) - *p += 4; /* skip stripe_width */ + /* stripe_width */ + ceph_decode_skip_32(p, end, bad); - if (ev >= 13) { - *p += 8; /* skip target_max_bytes */ - *p += 8; /* skip target_max_objects */ - *p += 4; /* skip cache_target_dirty_ratio_micro */ - *p += 4; /* skip cache_target_full_ratio_micro */ - *p += 4; /* skip cache_min_flush_age */ - *p += 4; /* skip cache_min_evict_age */ - } + if (ev >= 13) + /* target_max_*, cache_target_*, cache_min_* */ + ceph_decode_skip_n(p, end, 16 + 8 + 8, bad); - if (ev >= 14) { - /* skip erasure_code_profile */ - len = ceph_decode_32(p); - *p += len; - } + if (ev >= 14) + /* erasure_code_profile */ + ceph_decode_skip_string(p, end, bad); /* * last_force_op_resend_preluminous, will be overridden if the * map was encoded with RESEND_ON_SPLIT */ if (ev >= 15) - pi->last_force_request_resend = ceph_decode_32(p); + ceph_decode_32_safe(p, end, pi->last_force_request_resend, bad); else pi->last_force_request_resend = 0; if (ev >= 16) - *p += 4; /* skip min_read_recency_for_promote */ + /* min_read_recency_for_promote */ + ceph_decode_skip_32(p, end, bad); if (ev >= 17) - *p += 8; /* skip expected_num_objects */ + /* expected_num_objects */ + ceph_decode_skip_64(p, end, bad); if (ev >= 19) - *p += 4; /* skip cache_target_dirty_high_ratio_micro */ + /* cache_target_dirty_high_ratio_micro */ + ceph_decode_skip_32(p, end, bad); if (ev >= 20) - *p += 4; /* skip min_write_recency_for_promote */ + /* min_write_recency_for_promote */ + ceph_decode_skip_32(p, end, bad); if (ev >= 21) - *p += 1; /* skip use_gmt_hitset */ + /* use_gmt_hitset */ + ceph_decode_skip_8(p, end, bad); if (ev >= 22) - *p += 1; /* skip fast_read */ + /* fast_read */ + ceph_decode_skip_8(p, end, bad); - if (ev >= 23) { - *p += 4; /* skip hit_set_grade_decay_rate */ - *p += 4; /* skip hit_set_search_last_n */ - } + if (ev >= 23) + /* hit_set_grade_decay_rate, hit_set_search_last_n */ + ceph_decode_skip_n(p, end, 4 + 4, bad); if (ev >= 24) { - /* skip opts */ - *p += 1 + 1; /* versions */ - len = ceph_decode_32(p); - *p += len; + /* opts (with versions) */ + ceph_decode_skip_n(p, end, 2, bad); + ceph_decode_skip_string(p, end, bad); } if (ev >= 25) - pi->last_force_request_resend = ceph_decode_32(p); + ceph_decode_32_safe(p, end, pi->last_force_request_resend, bad); /* ignore the rest */ From 3680fc138e31d8a9e8e344d72c6692e921dbb4a3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 4 Dec 2025 22:51:04 -0800 Subject: [PATCH 6/7] ceph: stop selecting CRC32, CRYPTO, and CRYPTO_AES None of the CEPH_FS code directly requires CRC32, CRYPTO, or CRYPTO_AES. These options do get selected indirectly anyway via CEPH_LIB, which does need them, but there is no need for CEPH_FS to select them too. Signed-off-by: Eric Biggers Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index 3e7def3d31c1..3d64a316ca31 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -3,9 +3,6 @@ config CEPH_FS tristate "Ceph distributed file system" depends on INET select CEPH_LIB - select CRC32 - select CRYPTO_AES - select CRYPTO select NETFS_SUPPORT select FS_ENCRYPTION_ALGS if FS_ENCRYPTION default n From 21c1466ea25114871707d95745a16ebcf231e197 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sun, 7 Dec 2025 17:39:44 +0100 Subject: [PATCH 7/7] rbd: stop selecting CRC32, CRYPTO, and CRYPTO_AES None of the RBD code directly requires CRC32, CRYPTO, or CRYPTO_AES. These options are needed by CEPH_LIB code and they are selected there directly. Signed-off-by: Ilya Dryomov Reviewed-by: Dongsheng Yang --- drivers/block/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 77d694448990..858320b6ebb7 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -316,9 +316,6 @@ config BLK_DEV_RBD tristate "Rados block device (RBD)" depends on INET && BLOCK select CEPH_LIB - select CRC32 - select CRYPTO_AES - select CRYPTO help Say Y here if you want include the Rados block device, which stripes a block device over objects stored in the Ceph distributed object