Some messenger improvements from Eric and Max, a patch to address the

issue (also affected userspace) of incorrect permissions being granted
 to users who have access to multiple different CephFS instances within
 the same cluster from Kotresh and a bunch of assorted CephFS fixes from
 Slava.
 -----BEGIN PGP SIGNATURE-----
 
 iQFFBAABCAAxFiEEydHwtzie9C7TfviiSn/eOAIR84sFAmjpShsTHGlkcnlvbW92
 QGdtYWlsLmNvbQAKCRBKf944AhHzizoaB/C7qTw5Olh8NDpX8I+ljEO50XNduurf
 fp2eNn0ub5brhcvh8iACSPKE2oer/bDvv3b2SN9310GmBX3f7H2Ht5TeH2tGBRN0
 clg+C2DmY/2watHovo+ua7YAd+HiPH2XMbpeU38Pu1nEdmiU6cQ0YaOn8n2p+c1E
 bID0dMHWb4HTmFRURqWqKPDkM1fLHRxIVgyOMaov5vs0T7XdglwPja3S2W6epvqF
 hKSMSvO/j9qYlOsBM6G6IuHDMJomzBqOQKqsQqC4XZN6uXeaKPTLYRnzxKfJUEWj
 P5JTaum7NGGtfIs0L9wr6zpou/GY2zTFiyXhLZsLJMn894bBO5nArg==
 =wGIB
 -----END PGP SIGNATURE-----

Merge tag 'ceph-for-6.18-rc1' of https://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:

 - some messenger improvements (Eric and Max)

 - address an issue (also affected userspace) of incorrect permissions
   being granted to users who have access to multiple different CephFS
   instances within the same cluster (Kotresh)

 - a bunch of assorted CephFS fixes (Slava)

* tag 'ceph-for-6.18-rc1' of https://github.com/ceph/ceph-client:
  ceph: add bug tracking system info to MAINTAINERS
  ceph: fix multifs mds auth caps issue
  ceph: cleanup in ceph_alloc_readdir_reply_buffer()
  ceph: fix potential NULL dereference issue in ceph_fill_trace()
  libceph: add empty check to ceph_con_get_out_msg()
  libceph: pass the message pointer instead of loading con->out_msg
  libceph: make ceph_con_get_out_msg() return the message pointer
  ceph: fix potential race condition on operations with CEPH_I_ODIRECT flag
  ceph: refactor wake_up_bit() pattern of calling
  ceph: fix potential race condition in ceph_ioctl_lazyio()
  ceph: fix overflowed constant issue in ceph_do_objects_copy()
  ceph: fix wrong sizeof argument issue in register_session()
  ceph: add checking of wait_for_completion_killable() return value
  ceph: make ceph_start_io_*() killable
  libceph: Use HMAC-SHA256 library instead of crypto_shash
pull/1354/merge
Linus Torvalds 2025-10-10 11:30:19 -07:00
commit 8bd9238e51
17 changed files with 323 additions and 248 deletions

View File

@ -5694,6 +5694,7 @@ M: Xiubo Li <xiubli@redhat.com>
L: ceph-devel@vger.kernel.org L: ceph-devel@vger.kernel.org
S: Supported S: Supported
W: http://ceph.com/ W: http://ceph.com/
B: https://tracker.ceph.com/
T: git https://github.com/ceph/ceph-client.git T: git https://github.com/ceph/ceph-client.git
F: include/linux/ceph/ F: include/linux/ceph/
F: include/linux/crush/ F: include/linux/crush/
@ -5705,6 +5706,7 @@ M: Ilya Dryomov <idryomov@gmail.com>
L: ceph-devel@vger.kernel.org L: ceph-devel@vger.kernel.org
S: Supported S: Supported
W: http://ceph.com/ W: http://ceph.com/
B: https://tracker.ceph.com/
T: git https://github.com/ceph/ceph-client.git T: git https://github.com/ceph/ceph-client.git
F: Documentation/filesystems/ceph.rst F: Documentation/filesystems/ceph.rst
F: fs/ceph/ F: fs/ceph/
@ -21357,6 +21359,7 @@ R: Dongsheng Yang <dongsheng.yang@easystack.cn>
L: ceph-devel@vger.kernel.org L: ceph-devel@vger.kernel.org
S: Supported S: Supported
W: http://ceph.com/ W: http://ceph.com/
B: https://tracker.ceph.com/
T: git https://github.com/ceph/ceph-client.git T: git https://github.com/ceph/ceph-client.git
F: Documentation/ABI/testing/sysfs-bus-rbd F: Documentation/ABI/testing/sysfs-bus-rbd
F: drivers/block/rbd.c F: drivers/block/rbd.c

View File

@ -1260,8 +1260,7 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
spin_unlock(&fsc->async_unlink_conflict_lock); spin_unlock(&fsc->async_unlink_conflict_lock);
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
di->flags &= ~CEPH_DENTRY_ASYNC_UNLINK; clear_and_wake_up_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags);
wake_up_bit(&di->flags, CEPH_DENTRY_ASYNC_UNLINK_BIT);
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
synchronize_rcu(); synchronize_rcu();

View File

@ -579,8 +579,7 @@ static void wake_async_create_waiters(struct inode *inode,
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) { if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) {
ci->i_ceph_flags &= ~CEPH_I_ASYNC_CREATE; clear_and_wake_up_bit(CEPH_ASYNC_CREATE_BIT, &ci->i_ceph_flags);
wake_up_bit(&ci->i_ceph_flags, CEPH_ASYNC_CREATE_BIT);
if (ci->i_ceph_flags & CEPH_I_ASYNC_CHECK_CAPS) { if (ci->i_ceph_flags & CEPH_I_ASYNC_CHECK_CAPS) {
ci->i_ceph_flags &= ~CEPH_I_ASYNC_CHECK_CAPS; ci->i_ceph_flags &= ~CEPH_I_ASYNC_CHECK_CAPS;
@ -762,8 +761,7 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode,
} }
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
di->flags &= ~CEPH_DENTRY_ASYNC_CREATE; clear_and_wake_up_bit(CEPH_DENTRY_ASYNC_CREATE_BIT, &di->flags);
wake_up_bit(&di->flags, CEPH_DENTRY_ASYNC_CREATE_BIT);
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
return ret; return ret;
@ -2121,10 +2119,10 @@ again:
if (ceph_inode_is_shutdown(inode)) if (ceph_inode_is_shutdown(inode))
return -ESTALE; return -ESTALE;
if (direct_lock) ret = direct_lock ? ceph_start_io_direct(inode) :
ceph_start_io_direct(inode);
else
ceph_start_io_read(inode); ceph_start_io_read(inode);
if (ret)
return ret;
if (!(fi->flags & CEPH_F_SYNC) && !direct_lock) if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
want |= CEPH_CAP_FILE_CACHE; want |= CEPH_CAP_FILE_CACHE;
@ -2277,7 +2275,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
(fi->flags & CEPH_F_SYNC)) (fi->flags & CEPH_F_SYNC))
return copy_splice_read(in, ppos, pipe, len, flags); return copy_splice_read(in, ppos, pipe, len, flags);
ceph_start_io_read(inode); ret = ceph_start_io_read(inode);
if (ret)
return ret;
want = CEPH_CAP_FILE_CACHE; want = CEPH_CAP_FILE_CACHE;
if (fi->fmode & CEPH_FILE_MODE_LAZY) if (fi->fmode & CEPH_FILE_MODE_LAZY)
@ -2356,10 +2356,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
direct_lock = true; direct_lock = true;
retry_snap: retry_snap:
if (direct_lock) err = direct_lock ? ceph_start_io_direct(inode) :
ceph_start_io_direct(inode);
else
ceph_start_io_write(inode); ceph_start_io_write(inode);
if (err)
goto out_unlocked;
if (iocb->ki_flags & IOCB_APPEND) { if (iocb->ki_flags & IOCB_APPEND) {
err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
@ -2878,7 +2878,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
struct ceph_object_id src_oid, dst_oid; struct ceph_object_id src_oid, dst_oid;
struct ceph_osd_client *osdc; struct ceph_osd_client *osdc;
struct ceph_osd_request *req; struct ceph_osd_request *req;
size_t bytes = 0; ssize_t bytes = 0;
u64 src_objnum, src_objoff, dst_objnum, dst_objoff; u64 src_objnum, src_objoff, dst_objnum, dst_objoff;
u32 src_objlen, dst_objlen; u32 src_objlen, dst_objlen;
u32 object_size = src_ci->i_layout.object_size; u32 object_size = src_ci->i_layout.object_size;
@ -2928,7 +2928,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
"OSDs don't support copy-from2; disabling copy offload\n"); "OSDs don't support copy-from2; disabling copy offload\n");
} }
doutc(cl, "returned %d\n", ret); doutc(cl, "returned %d\n", ret);
if (!bytes) if (bytes <= 0)
bytes = ret; bytes = ret;
goto out; goto out;
} }

View File

@ -1794,6 +1794,11 @@ retry_lookup:
goto done; goto done;
} }
if (unlikely(!in)) {
err = -EINVAL;
goto done;
}
/* attach proper inode */ /* attach proper inode */
if (d_really_is_negative(dn)) { if (d_really_is_negative(dn)) {
ceph_dir_clear_ordered(dir); ceph_dir_clear_ordered(dir);
@ -1829,6 +1834,12 @@ retry_lookup:
doutc(cl, " linking snapped dir %p to dn %p\n", in, doutc(cl, " linking snapped dir %p to dn %p\n", in,
req->r_dentry); req->r_dentry);
ceph_dir_clear_ordered(dir); ceph_dir_clear_ordered(dir);
if (unlikely(!in)) {
err = -EINVAL;
goto done;
}
ihold(in); ihold(in);
err = splice_dentry(&req->r_dentry, in); err = splice_dentry(&req->r_dentry, in);
if (err < 0) if (err < 0)

View File

@ -21,14 +21,23 @@
/* Call with exclusively locked inode->i_rwsem */ /* Call with exclusively locked inode->i_rwsem */
static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
{ {
bool is_odirect;
lockdep_assert_held_write(&inode->i_rwsem); lockdep_assert_held_write(&inode->i_rwsem);
if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) {
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
ci->i_ceph_flags &= ~CEPH_I_ODIRECT; /* ensure that bit state is consistent */
spin_unlock(&ci->i_ceph_lock); smp_mb__before_atomic();
inode_dio_wait(inode); is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
if (is_odirect) {
clear_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags);
/* ensure modified bit is visible */
smp_mb__after_atomic();
} }
spin_unlock(&ci->i_ceph_lock);
if (is_odirect)
inode_dio_wait(inode);
} }
/** /**
@ -47,20 +56,35 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
* Note that buffered writes and truncates both take a write lock on * Note that buffered writes and truncates both take a write lock on
* inode->i_rwsem, meaning that those are serialised w.r.t. the reads. * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
*/ */
void int ceph_start_io_read(struct inode *inode)
ceph_start_io_read(struct inode *inode)
{ {
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
bool is_odirect;
int err;
/* Be an optimist! */ /* Be an optimist! */
down_read(&inode->i_rwsem); err = down_read_killable(&inode->i_rwsem);
if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) if (err)
return; return err;
spin_lock(&ci->i_ceph_lock);
/* ensure that bit state is consistent */
smp_mb__before_atomic();
is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
spin_unlock(&ci->i_ceph_lock);
if (!is_odirect)
return 0;
up_read(&inode->i_rwsem); up_read(&inode->i_rwsem);
/* Slow path.... */ /* Slow path.... */
down_write(&inode->i_rwsem); err = down_write_killable(&inode->i_rwsem);
if (err)
return err;
ceph_block_o_direct(ci, inode); ceph_block_o_direct(ci, inode);
downgrade_write(&inode->i_rwsem); downgrade_write(&inode->i_rwsem);
return 0;
} }
/** /**
@ -83,11 +107,12 @@ ceph_end_io_read(struct inode *inode)
* Declare that a buffered write operation is about to start, and ensure * Declare that a buffered write operation is about to start, and ensure
* that we block all direct I/O. * that we block all direct I/O.
*/ */
void int ceph_start_io_write(struct inode *inode)
ceph_start_io_write(struct inode *inode)
{ {
down_write(&inode->i_rwsem); int err = down_write_killable(&inode->i_rwsem);
if (!err)
ceph_block_o_direct(ceph_inode(inode), inode); ceph_block_o_direct(ceph_inode(inode), inode);
return err;
} }
/** /**
@ -106,12 +131,22 @@ ceph_end_io_write(struct inode *inode)
/* Call with exclusively locked inode->i_rwsem */ /* Call with exclusively locked inode->i_rwsem */
static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
{ {
bool is_odirect;
lockdep_assert_held_write(&inode->i_rwsem); lockdep_assert_held_write(&inode->i_rwsem);
if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) {
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
ci->i_ceph_flags |= CEPH_I_ODIRECT; /* ensure that bit state is consistent */
smp_mb__before_atomic();
is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
if (!is_odirect) {
set_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags);
/* ensure modified bit is visible */
smp_mb__after_atomic();
}
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
if (!is_odirect) {
/* FIXME: unmap_mapping_range? */ /* FIXME: unmap_mapping_range? */
filemap_write_and_wait(inode->i_mapping); filemap_write_and_wait(inode->i_mapping);
} }
@ -133,20 +168,35 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
* Note that buffered writes and truncates both take a write lock on * Note that buffered writes and truncates both take a write lock on
* inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
*/ */
void int ceph_start_io_direct(struct inode *inode)
ceph_start_io_direct(struct inode *inode)
{ {
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
bool is_odirect;
int err;
/* Be an optimist! */ /* Be an optimist! */
down_read(&inode->i_rwsem); err = down_read_killable(&inode->i_rwsem);
if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) if (err)
return; return err;
spin_lock(&ci->i_ceph_lock);
/* ensure that bit state is consistent */
smp_mb__before_atomic();
is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
spin_unlock(&ci->i_ceph_lock);
if (is_odirect)
return 0;
up_read(&inode->i_rwsem); up_read(&inode->i_rwsem);
/* Slow path.... */ /* Slow path.... */
down_write(&inode->i_rwsem); err = down_write_killable(&inode->i_rwsem);
if (err)
return err;
ceph_block_buffered(ci, inode); ceph_block_buffered(ci, inode);
downgrade_write(&inode->i_rwsem); downgrade_write(&inode->i_rwsem);
return 0;
} }
/** /**

View File

@ -2,11 +2,13 @@
#ifndef _FS_CEPH_IO_H #ifndef _FS_CEPH_IO_H
#define _FS_CEPH_IO_H #define _FS_CEPH_IO_H
void ceph_start_io_read(struct inode *inode); #include <linux/compiler_attributes.h>
int __must_check ceph_start_io_read(struct inode *inode);
void ceph_end_io_read(struct inode *inode); void ceph_end_io_read(struct inode *inode);
void ceph_start_io_write(struct inode *inode); int __must_check ceph_start_io_write(struct inode *inode);
void ceph_end_io_write(struct inode *inode); void ceph_end_io_write(struct inode *inode);
void ceph_start_io_direct(struct inode *inode); int __must_check ceph_start_io_direct(struct inode *inode);
void ceph_end_io_direct(struct inode *inode); void ceph_end_io_direct(struct inode *inode);
#endif /* FS_CEPH_IO_H */ #endif /* FS_CEPH_IO_H */

View File

@ -246,21 +246,28 @@ static long ceph_ioctl_lazyio(struct file *file)
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc;
struct ceph_client *cl = mdsc->fsc->client; struct ceph_client *cl = mdsc->fsc->client;
bool is_file_already_lazy = false;
if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
fi->fmode |= CEPH_FILE_MODE_LAZY; fi->fmode |= CEPH_FILE_MODE_LAZY;
ci->i_nr_by_mode[ffs(CEPH_FILE_MODE_LAZY)]++; ci->i_nr_by_mode[ffs(CEPH_FILE_MODE_LAZY)]++;
__ceph_touch_fmode(ci, mdsc, fi->fmode); __ceph_touch_fmode(ci, mdsc, fi->fmode);
} else {
is_file_already_lazy = true;
}
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
if (is_file_already_lazy) {
doutc(cl, "file %p %p %llx.%llx already lazy\n", file, inode,
ceph_vinop(inode));
} else {
doutc(cl, "file %p %p %llx.%llx marked lazy\n", file, inode, doutc(cl, "file %p %p %llx.%llx marked lazy\n", file, inode,
ceph_vinop(inode)); ceph_vinop(inode));
ceph_check_caps(ci, 0); ceph_check_caps(ci, 0);
} else {
doutc(cl, "file %p %p %llx.%llx already lazy\n", file, inode,
ceph_vinop(inode));
} }
return 0; return 0;
} }

View File

@ -221,7 +221,10 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
if (err && err != -ERESTARTSYS) if (err && err != -ERESTARTSYS)
return err; return err;
wait_for_completion_killable(&req->r_safe_completion); err = wait_for_completion_killable(&req->r_safe_completion);
if (err)
return err;
return 0; return 0;
} }

View File

@ -979,14 +979,15 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
if (mds >= mdsc->max_sessions) { if (mds >= mdsc->max_sessions) {
int newmax = 1 << get_count_order(mds + 1); int newmax = 1 << get_count_order(mds + 1);
struct ceph_mds_session **sa; struct ceph_mds_session **sa;
size_t ptr_size = sizeof(struct ceph_mds_session *);
doutc(cl, "realloc to %d\n", newmax); doutc(cl, "realloc to %d\n", newmax);
sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); sa = kcalloc(newmax, ptr_size, GFP_NOFS);
if (!sa) if (!sa)
goto fail_realloc; goto fail_realloc;
if (mdsc->sessions) { if (mdsc->sessions) {
memcpy(sa, mdsc->sessions, memcpy(sa, mdsc->sessions,
mdsc->max_sessions * sizeof(void *)); mdsc->max_sessions * ptr_size);
kfree(mdsc->sessions); kfree(mdsc->sessions);
} }
mdsc->sessions = sa; mdsc->sessions = sa;
@ -2532,6 +2533,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options;
size_t size = sizeof(struct ceph_mds_reply_dir_entry); size_t size = sizeof(struct ceph_mds_reply_dir_entry);
unsigned int num_entries; unsigned int num_entries;
u64 bytes_count;
int order; int order;
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
@ -2540,7 +2542,11 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
num_entries = max(num_entries, 1U); num_entries = max(num_entries, 1U);
num_entries = min(num_entries, opt->max_readdir); num_entries = min(num_entries, opt->max_readdir);
order = get_order(size * num_entries); bytes_count = (u64)size * num_entries;
if (unlikely(bytes_count > ULONG_MAX))
bytes_count = ULONG_MAX;
order = get_order((unsigned long)bytes_count);
while (order >= 0) { while (order >= 0) {
rinfo->dir_entries = (void*)__get_free_pages(GFP_KERNEL | rinfo->dir_entries = (void*)__get_free_pages(GFP_KERNEL |
__GFP_NOWARN | __GFP_NOWARN |
@ -2550,7 +2556,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
break; break;
order--; order--;
} }
if (!rinfo->dir_entries) if (!rinfo->dir_entries || unlikely(order < 0))
return -ENOMEM; return -ENOMEM;
num_entries = (PAGE_SIZE << order) / size; num_entries = (PAGE_SIZE << order) / size;
@ -5649,11 +5655,19 @@ static int ceph_mds_auth_match(struct ceph_mds_client *mdsc,
u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid); u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid);
u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid); u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid);
struct ceph_client *cl = mdsc->fsc->client; struct ceph_client *cl = mdsc->fsc->client;
const char *fs_name = mdsc->fsc->mount_options->mds_namespace;
const char *spath = mdsc->fsc->mount_options->server_path; const char *spath = mdsc->fsc->mount_options->server_path;
bool gid_matched = false; bool gid_matched = false;
u32 gid, tlen, len; u32 gid, tlen, len;
int i, j; int i, j;
doutc(cl, "fsname check fs_name=%s match.fs_name=%s\n",
fs_name, auth->match.fs_name ? auth->match.fs_name : "");
if (auth->match.fs_name && strcmp(auth->match.fs_name, fs_name)) {
/* fsname mismatch, try next one */
return 0;
}
doutc(cl, "match.uid %lld\n", auth->match.uid); doutc(cl, "match.uid %lld\n", auth->match.uid);
if (auth->match.uid != MDS_AUTH_UID_ANY) { if (auth->match.uid != MDS_AUTH_UID_ANY) {
if (auth->match.uid != caller_uid) if (auth->match.uid != caller_uid)

View File

@ -353,10 +353,22 @@ struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p,
__decode_and_drop_type(p, end, u8, bad_ext); __decode_and_drop_type(p, end, u8, bad_ext);
} }
if (mdsmap_ev >= 8) { if (mdsmap_ev >= 8) {
u32 fsname_len;
/* enabled */ /* enabled */
ceph_decode_8_safe(p, end, m->m_enabled, bad_ext); ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
/* fs_name */ /* fs_name */
ceph_decode_skip_string(p, end, bad_ext); ceph_decode_32_safe(p, end, fsname_len, bad_ext);
/* validate fsname against mds_namespace */
if (!namespace_equals(mdsc->fsc->mount_options, *p,
fsname_len)) {
pr_warn_client(cl, "fsname %*pE doesn't match mds_namespace %s\n",
(int)fsname_len, (char *)*p,
mdsc->fsc->mount_options->mds_namespace);
goto bad;
}
/* skip fsname after validation */
ceph_decode_skip_n(p, end, fsname_len, bad);
} }
/* damaged */ /* damaged */
if (mdsmap_ev >= 9) { if (mdsmap_ev >= 9) {

View File

@ -246,20 +246,6 @@ static void canonicalize_path(char *path)
path[j] = '\0'; path[j] = '\0';
} }
/*
* Check if the mds namespace in ceph_mount_options matches
* the passed in namespace string. First time match (when
* ->mds_namespace is NULL) is treated specially, since
* ->mds_namespace needs to be initialized by the caller.
*/
static int namespace_equals(struct ceph_mount_options *fsopt,
const char *namespace, size_t len)
{
return !(fsopt->mds_namespace &&
(strlen(fsopt->mds_namespace) != len ||
strncmp(fsopt->mds_namespace, namespace, len)));
}
static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end, static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end,
struct fs_context *fc) struct fs_context *fc)
{ {

View File

@ -104,6 +104,20 @@ struct ceph_mount_options {
struct fscrypt_dummy_policy dummy_enc_policy; struct fscrypt_dummy_policy dummy_enc_policy;
}; };
/*
* Check if the mds namespace in ceph_mount_options matches
* the passed in namespace string. First time match (when
* ->mds_namespace is NULL) is treated specially, since
* ->mds_namespace needs to be initialized by the caller.
*/
static inline int namespace_equals(struct ceph_mount_options *fsopt,
const char *namespace, size_t len)
{
return !(fsopt->mds_namespace &&
(strlen(fsopt->mds_namespace) != len ||
strncmp(fsopt->mds_namespace, namespace, len)));
}
/* mount state */ /* mount state */
enum { enum {
CEPH_MOUNT_MOUNTING, CEPH_MOUNT_MOUNTING,
@ -639,7 +653,8 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
#define CEPH_I_FLUSH_SNAPS (1 << 8) /* need flush snapss */ #define CEPH_I_FLUSH_SNAPS (1 << 8) /* need flush snapss */
#define CEPH_I_ERROR_WRITE (1 << 9) /* have seen write errors */ #define CEPH_I_ERROR_WRITE (1 << 9) /* have seen write errors */
#define CEPH_I_ERROR_FILELOCK (1 << 10) /* have seen file lock errors */ #define CEPH_I_ERROR_FILELOCK (1 << 10) /* have seen file lock errors */
#define CEPH_I_ODIRECT (1 << 11) /* inode in direct I/O mode */ #define CEPH_I_ODIRECT_BIT (11) /* inode in direct I/O mode */
#define CEPH_I_ODIRECT (1 << CEPH_I_ODIRECT_BIT)
#define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */ #define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */
#define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT) #define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT)
#define CEPH_I_SHUTDOWN (1 << 13) /* inode is no longer usable */ #define CEPH_I_SHUTDOWN (1 << 13) /* inode is no longer usable */

View File

@ -2,6 +2,7 @@
#ifndef __FS_CEPH_MESSENGER_H #ifndef __FS_CEPH_MESSENGER_H
#define __FS_CEPH_MESSENGER_H #define __FS_CEPH_MESSENGER_H
#include <crypto/sha2.h>
#include <linux/bvec.h> #include <linux/bvec.h>
#include <linux/crypto.h> #include <linux/crypto.h>
#include <linux/kref.h> #include <linux/kref.h>
@ -412,7 +413,8 @@ struct ceph_connection_v2_info {
struct ceph_msg_data_cursor in_cursor; struct ceph_msg_data_cursor in_cursor;
struct ceph_msg_data_cursor out_cursor; struct ceph_msg_data_cursor out_cursor;
struct crypto_shash *hmac_tfm; /* post-auth signature */ struct hmac_sha256_key hmac_key; /* post-auth signature */
bool hmac_key_set;
struct crypto_aead *gcm_tfm; /* on-wire encryption */ struct crypto_aead *gcm_tfm; /* on-wire encryption */
struct aead_request *gcm_req; struct aead_request *gcm_req;
struct crypto_wait gcm_wait; struct crypto_wait gcm_wait;
@ -548,12 +550,12 @@ void ceph_addr_set_port(struct ceph_entity_addr *addr, int p);
void ceph_con_process_message(struct ceph_connection *con); void ceph_con_process_message(struct ceph_connection *con);
int ceph_con_in_msg_alloc(struct ceph_connection *con, int ceph_con_in_msg_alloc(struct ceph_connection *con,
struct ceph_msg_header *hdr, int *skip); struct ceph_msg_header *hdr, int *skip);
void ceph_con_get_out_msg(struct ceph_connection *con); struct ceph_msg *ceph_con_get_out_msg(struct ceph_connection *con);
/* messenger_v1.c */ /* messenger_v1.c */
int ceph_con_v1_try_read(struct ceph_connection *con); int ceph_con_v1_try_read(struct ceph_connection *con);
int ceph_con_v1_try_write(struct ceph_connection *con); int ceph_con_v1_try_write(struct ceph_connection *con);
void ceph_con_v1_revoke(struct ceph_connection *con); void ceph_con_v1_revoke(struct ceph_connection *con, struct ceph_msg *msg);
void ceph_con_v1_revoke_incoming(struct ceph_connection *con); void ceph_con_v1_revoke_incoming(struct ceph_connection *con);
bool ceph_con_v1_opened(struct ceph_connection *con); bool ceph_con_v1_opened(struct ceph_connection *con);
void ceph_con_v1_reset_session(struct ceph_connection *con); void ceph_con_v1_reset_session(struct ceph_connection *con);
@ -562,7 +564,7 @@ void ceph_con_v1_reset_protocol(struct ceph_connection *con);
/* messenger_v2.c */ /* messenger_v2.c */
int ceph_con_v2_try_read(struct ceph_connection *con); int ceph_con_v2_try_read(struct ceph_connection *con);
int ceph_con_v2_try_write(struct ceph_connection *con); int ceph_con_v2_try_write(struct ceph_connection *con);
void ceph_con_v2_revoke(struct ceph_connection *con); void ceph_con_v2_revoke(struct ceph_connection *con, struct ceph_msg *msg);
void ceph_con_v2_revoke_incoming(struct ceph_connection *con); void ceph_con_v2_revoke_incoming(struct ceph_connection *con);
bool ceph_con_v2_opened(struct ceph_connection *con); bool ceph_con_v2_opened(struct ceph_connection *con);
void ceph_con_v2_reset_session(struct ceph_connection *con); void ceph_con_v2_reset_session(struct ceph_connection *con);

View File

@ -6,8 +6,7 @@ config CEPH_LIB
select CRYPTO_AES select CRYPTO_AES
select CRYPTO_CBC select CRYPTO_CBC
select CRYPTO_GCM select CRYPTO_GCM
select CRYPTO_HMAC select CRYPTO_LIB_SHA256
select CRYPTO_SHA256
select CRYPTO select CRYPTO
select KEYS select KEYS
default n default n

View File

@ -1794,9 +1794,9 @@ void ceph_msg_revoke(struct ceph_msg *msg)
WARN_ON(con->state != CEPH_CON_S_OPEN); WARN_ON(con->state != CEPH_CON_S_OPEN);
dout("%s con %p msg %p was sending\n", __func__, con, msg); dout("%s con %p msg %p was sending\n", __func__, con, msg);
if (ceph_msgr2(from_msgr(con->msgr))) if (ceph_msgr2(from_msgr(con->msgr)))
ceph_con_v2_revoke(con); ceph_con_v2_revoke(con, msg);
else else
ceph_con_v1_revoke(con); ceph_con_v1_revoke(con, msg);
ceph_msg_put(con->out_msg); ceph_msg_put(con->out_msg);
con->out_msg = NULL; con->out_msg = NULL;
} else { } else {
@ -2111,11 +2111,13 @@ int ceph_con_in_msg_alloc(struct ceph_connection *con,
return ret; return ret;
} }
void ceph_con_get_out_msg(struct ceph_connection *con) struct ceph_msg *ceph_con_get_out_msg(struct ceph_connection *con)
{ {
struct ceph_msg *msg; struct ceph_msg *msg;
BUG_ON(list_empty(&con->out_queue)); if (list_empty(&con->out_queue))
return NULL;
msg = list_first_entry(&con->out_queue, struct ceph_msg, list_head); msg = list_first_entry(&con->out_queue, struct ceph_msg, list_head);
WARN_ON(msg->con != con); WARN_ON(msg->con != con);
@ -2142,7 +2144,7 @@ void ceph_con_get_out_msg(struct ceph_connection *con)
* message or in case of a fault. * message or in case of a fault.
*/ */
WARN_ON(con->out_msg); WARN_ON(con->out_msg);
con->out_msg = ceph_msg_get(msg); return con->out_msg = ceph_msg_get(msg);
} }
/* /*

View File

@ -169,10 +169,9 @@ static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
* Prepare footer for currently outgoing message, and finish things * Prepare footer for currently outgoing message, and finish things
* off. Assumes out_kvec* are already valid.. we just add on to the end. * off. Assumes out_kvec* are already valid.. we just add on to the end.
*/ */
static void prepare_write_message_footer(struct ceph_connection *con) static void prepare_write_message_footer(struct ceph_connection *con,
struct ceph_msg *m)
{ {
struct ceph_msg *m = con->out_msg;
m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
dout("prepare_write_message_footer %p\n", con); dout("prepare_write_message_footer %p\n", con);
@ -192,9 +191,9 @@ static void prepare_write_message_footer(struct ceph_connection *con)
/* /*
* Prepare headers for the next outgoing message. * Prepare headers for the next outgoing message.
*/ */
static void prepare_write_message(struct ceph_connection *con) static void prepare_write_message(struct ceph_connection *con,
struct ceph_msg *m)
{ {
struct ceph_msg *m;
u32 crc; u32 crc;
con_out_kvec_reset(con); con_out_kvec_reset(con);
@ -210,9 +209,6 @@ static void prepare_write_message(struct ceph_connection *con)
&con->v1.out_temp_ack); &con->v1.out_temp_ack);
} }
ceph_con_get_out_msg(con);
m = con->out_msg;
dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
m, con->out_seq, le16_to_cpu(m->hdr.type), m, con->out_seq, le16_to_cpu(m->hdr.type),
le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
@ -231,31 +227,31 @@ static void prepare_write_message(struct ceph_connection *con)
/* fill in hdr crc and finalize hdr */ /* fill in hdr crc and finalize hdr */
crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
con->out_msg->hdr.crc = cpu_to_le32(crc); m->hdr.crc = cpu_to_le32(crc);
memcpy(&con->v1.out_hdr, &con->out_msg->hdr, sizeof(con->v1.out_hdr)); memcpy(&con->v1.out_hdr, &m->hdr, sizeof(con->v1.out_hdr));
/* fill in front and middle crc, footer */ /* fill in front and middle crc, footer */
crc = crc32c(0, m->front.iov_base, m->front.iov_len); crc = crc32c(0, m->front.iov_base, m->front.iov_len);
con->out_msg->footer.front_crc = cpu_to_le32(crc); m->footer.front_crc = cpu_to_le32(crc);
if (m->middle) { if (m->middle) {
crc = crc32c(0, m->middle->vec.iov_base, crc = crc32c(0, m->middle->vec.iov_base,
m->middle->vec.iov_len); m->middle->vec.iov_len);
con->out_msg->footer.middle_crc = cpu_to_le32(crc); m->footer.middle_crc = cpu_to_le32(crc);
} else } else
con->out_msg->footer.middle_crc = 0; m->footer.middle_crc = 0;
dout("%s front_crc %u middle_crc %u\n", __func__, dout("%s front_crc %u middle_crc %u\n", __func__,
le32_to_cpu(con->out_msg->footer.front_crc), le32_to_cpu(m->footer.front_crc),
le32_to_cpu(con->out_msg->footer.middle_crc)); le32_to_cpu(m->footer.middle_crc));
con->out_msg->footer.flags = 0; m->footer.flags = 0;
/* is there a data payload? */ /* is there a data payload? */
con->out_msg->footer.data_crc = 0; m->footer.data_crc = 0;
if (m->data_length) { if (m->data_length) {
prepare_message_data(con->out_msg, m->data_length); prepare_message_data(m, m->data_length);
con->v1.out_more = 1; /* data + footer will follow */ con->v1.out_more = 1; /* data + footer will follow */
} else { } else {
/* no, queue up footer too and be done */ /* no, queue up footer too and be done */
prepare_write_message_footer(con); prepare_write_message_footer(con, m);
} }
ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
@ -462,9 +458,9 @@ out:
* 0 -> socket full, but more to do * 0 -> socket full, but more to do
* <0 -> error * <0 -> error
*/ */
static int write_partial_message_data(struct ceph_connection *con) static int write_partial_message_data(struct ceph_connection *con,
struct ceph_msg *msg)
{ {
struct ceph_msg *msg = con->out_msg;
struct ceph_msg_data_cursor *cursor = &msg->cursor; struct ceph_msg_data_cursor *cursor = &msg->cursor;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
u32 crc; u32 crc;
@ -516,7 +512,7 @@ static int write_partial_message_data(struct ceph_connection *con)
else else
msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
con_out_kvec_reset(con); con_out_kvec_reset(con);
prepare_write_message_footer(con); prepare_write_message_footer(con, msg);
return 1; /* must return > 0 to indicate success */ return 1; /* must return > 0 to indicate success */
} }
@ -1472,6 +1468,7 @@ bad_tag:
*/ */
int ceph_con_v1_try_write(struct ceph_connection *con) int ceph_con_v1_try_write(struct ceph_connection *con)
{ {
struct ceph_msg *msg;
int ret = 1; int ret = 1;
dout("try_write start %p state %d\n", con, con->state); dout("try_write start %p state %d\n", con, con->state);
@ -1518,14 +1515,15 @@ more:
} }
/* msg pages? */ /* msg pages? */
if (con->out_msg) { msg = con->out_msg;
if (msg) {
if (con->v1.out_msg_done) { if (con->v1.out_msg_done) {
ceph_msg_put(con->out_msg); ceph_msg_put(msg);
con->out_msg = NULL; /* we're done with this one */ con->out_msg = NULL; /* we're done with this one */
goto do_next; goto do_next;
} }
ret = write_partial_message_data(con); ret = write_partial_message_data(con, msg);
if (ret == 1) if (ret == 1)
goto more; /* we need to send the footer, too! */ goto more; /* we need to send the footer, too! */
if (ret == 0) if (ret == 0)
@ -1545,8 +1543,8 @@ do_next:
goto more; goto more;
} }
/* is anything else pending? */ /* is anything else pending? */
if (!list_empty(&con->out_queue)) { if ((msg = ceph_con_get_out_msg(con)) != NULL) {
prepare_write_message(con); prepare_write_message(con, msg);
goto more; goto more;
} }
if (con->in_seq > con->in_seq_acked) { if (con->in_seq > con->in_seq_acked) {
@ -1564,10 +1562,8 @@ out:
return ret; return ret;
} }
void ceph_con_v1_revoke(struct ceph_connection *con) void ceph_con_v1_revoke(struct ceph_connection *con, struct ceph_msg *msg)
{ {
struct ceph_msg *msg = con->out_msg;
WARN_ON(con->v1.out_skip); WARN_ON(con->v1.out_skip);
/* footer */ /* footer */
if (con->v1.out_msg_done) { if (con->v1.out_msg_done) {

View File

@ -709,7 +709,7 @@ static int setup_crypto(struct ceph_connection *con,
dout("%s con %p con_mode %d session_key_len %d con_secret_len %d\n", dout("%s con %p con_mode %d session_key_len %d con_secret_len %d\n",
__func__, con, con->v2.con_mode, session_key_len, con_secret_len); __func__, con, con->v2.con_mode, session_key_len, con_secret_len);
WARN_ON(con->v2.hmac_tfm || con->v2.gcm_tfm || con->v2.gcm_req); WARN_ON(con->v2.hmac_key_set || con->v2.gcm_tfm || con->v2.gcm_req);
if (con->v2.con_mode != CEPH_CON_MODE_CRC && if (con->v2.con_mode != CEPH_CON_MODE_CRC &&
con->v2.con_mode != CEPH_CON_MODE_SECURE) { con->v2.con_mode != CEPH_CON_MODE_SECURE) {
@ -723,22 +723,8 @@ static int setup_crypto(struct ceph_connection *con,
return 0; /* auth_none */ return 0; /* auth_none */
} }
noio_flag = memalloc_noio_save(); hmac_sha256_preparekey(&con->v2.hmac_key, session_key, session_key_len);
con->v2.hmac_tfm = crypto_alloc_shash("hmac(sha256)", 0, 0); con->v2.hmac_key_set = true;
memalloc_noio_restore(noio_flag);
if (IS_ERR(con->v2.hmac_tfm)) {
ret = PTR_ERR(con->v2.hmac_tfm);
con->v2.hmac_tfm = NULL;
pr_err("failed to allocate hmac tfm context: %d\n", ret);
return ret;
}
ret = crypto_shash_setkey(con->v2.hmac_tfm, session_key,
session_key_len);
if (ret) {
pr_err("failed to set hmac key: %d\n", ret);
return ret;
}
if (con->v2.con_mode == CEPH_CON_MODE_CRC) { if (con->v2.con_mode == CEPH_CON_MODE_CRC) {
WARN_ON(con_secret_len); WARN_ON(con_secret_len);
@ -793,38 +779,26 @@ static int setup_crypto(struct ceph_connection *con,
return 0; /* auth_x, secure mode */ return 0; /* auth_x, secure mode */
} }
static int ceph_hmac_sha256(struct ceph_connection *con, static void ceph_hmac_sha256(struct ceph_connection *con,
const struct kvec *kvecs, int kvec_cnt, u8 *hmac) const struct kvec *kvecs, int kvec_cnt,
u8 hmac[SHA256_DIGEST_SIZE])
{ {
SHASH_DESC_ON_STACK(desc, con->v2.hmac_tfm); /* tfm arg is ignored */ struct hmac_sha256_ctx ctx;
int ret;
int i; int i;
dout("%s con %p hmac_tfm %p kvec_cnt %d\n", __func__, con, dout("%s con %p hmac_key_set %d kvec_cnt %d\n", __func__, con,
con->v2.hmac_tfm, kvec_cnt); con->v2.hmac_key_set, kvec_cnt);
if (!con->v2.hmac_tfm) { if (!con->v2.hmac_key_set) {
memset(hmac, 0, SHA256_DIGEST_SIZE); memset(hmac, 0, SHA256_DIGEST_SIZE);
return 0; /* auth_none */ return; /* auth_none */
} }
desc->tfm = con->v2.hmac_tfm; /* auth_x, both plain and secure modes */
ret = crypto_shash_init(desc); hmac_sha256_init(&ctx, &con->v2.hmac_key);
if (ret) for (i = 0; i < kvec_cnt; i++)
goto out; hmac_sha256_update(&ctx, kvecs[i].iov_base, kvecs[i].iov_len);
hmac_sha256_final(&ctx, hmac);
for (i = 0; i < kvec_cnt; i++) {
ret = crypto_shash_update(desc, kvecs[i].iov_base,
kvecs[i].iov_len);
if (ret)
goto out;
}
ret = crypto_shash_final(desc, hmac);
out:
shash_desc_zero(desc);
return ret; /* auth_x, both plain and secure modes */
} }
static void gcm_inc_nonce(struct ceph_gcm_nonce *nonce) static void gcm_inc_nonce(struct ceph_gcm_nonce *nonce)
@ -1455,17 +1429,14 @@ static int prepare_auth_request_more(struct ceph_connection *con,
static int prepare_auth_signature(struct ceph_connection *con) static int prepare_auth_signature(struct ceph_connection *con)
{ {
void *buf; void *buf;
int ret;
buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE, buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE,
con_secure(con))); con_secure(con)));
if (!buf) if (!buf)
return -ENOMEM; return -ENOMEM;
ret = ceph_hmac_sha256(con, con->v2.in_sign_kvecs, ceph_hmac_sha256(con, con->v2.in_sign_kvecs, con->v2.in_sign_kvec_cnt,
con->v2.in_sign_kvec_cnt, CTRL_BODY(buf)); CTRL_BODY(buf));
if (ret)
return ret;
return prepare_control(con, FRAME_TAG_AUTH_SIGNATURE, buf, return prepare_control(con, FRAME_TAG_AUTH_SIGNATURE, buf,
SHA256_DIGEST_SIZE); SHA256_DIGEST_SIZE);
@ -1589,10 +1560,11 @@ static int prepare_ack(struct ceph_connection *con)
return prepare_control(con, FRAME_TAG_ACK, con->v2.out_buf, 8); return prepare_control(con, FRAME_TAG_ACK, con->v2.out_buf, 8);
} }
static void prepare_epilogue_plain(struct ceph_connection *con, bool aborted) static void prepare_epilogue_plain(struct ceph_connection *con,
struct ceph_msg *msg, bool aborted)
{ {
dout("%s con %p msg %p aborted %d crcs %u %u %u\n", __func__, con, dout("%s con %p msg %p aborted %d crcs %u %u %u\n", __func__, con,
con->out_msg, aborted, con->v2.out_epil.front_crc, msg, aborted, con->v2.out_epil.front_crc,
con->v2.out_epil.middle_crc, con->v2.out_epil.data_crc); con->v2.out_epil.middle_crc, con->v2.out_epil.data_crc);
encode_epilogue_plain(con, aborted); encode_epilogue_plain(con, aborted);
@ -1603,10 +1575,9 @@ static void prepare_epilogue_plain(struct ceph_connection *con, bool aborted)
* For "used" empty segments, crc is -1. For unused (trailing) * For "used" empty segments, crc is -1. For unused (trailing)
* segments, crc is 0. * segments, crc is 0.
*/ */
static void prepare_message_plain(struct ceph_connection *con) static void prepare_message_plain(struct ceph_connection *con,
struct ceph_msg *msg)
{ {
struct ceph_msg *msg = con->out_msg;
prepare_head_plain(con, con->v2.out_buf, prepare_head_plain(con, con->v2.out_buf,
sizeof(struct ceph_msg_header2), NULL, 0, false); sizeof(struct ceph_msg_header2), NULL, 0, false);
@ -1647,7 +1618,7 @@ static void prepare_message_plain(struct ceph_connection *con)
con->v2.out_state = OUT_S_QUEUE_DATA; con->v2.out_state = OUT_S_QUEUE_DATA;
} else { } else {
con->v2.out_epil.data_crc = 0; con->v2.out_epil.data_crc = 0;
prepare_epilogue_plain(con, false); prepare_epilogue_plain(con, msg, false);
con->v2.out_state = OUT_S_FINISH_MESSAGE; con->v2.out_state = OUT_S_FINISH_MESSAGE;
} }
} }
@ -1659,7 +1630,8 @@ static void prepare_message_plain(struct ceph_connection *con)
* allocate pages for the entire tail of the message (currently up * allocate pages for the entire tail of the message (currently up
* to ~32M) and two sgs arrays (up to ~256K each)... * to ~32M) and two sgs arrays (up to ~256K each)...
*/ */
static int prepare_message_secure(struct ceph_connection *con) static int prepare_message_secure(struct ceph_connection *con,
struct ceph_msg *msg)
{ {
void *zerop = page_address(ceph_zero_page); void *zerop = page_address(ceph_zero_page);
struct sg_table enc_sgt = {}; struct sg_table enc_sgt = {};
@ -1674,7 +1646,7 @@ static int prepare_message_secure(struct ceph_connection *con)
if (ret) if (ret)
return ret; return ret;
tail_len = tail_onwire_len(con->out_msg, true); tail_len = tail_onwire_len(msg, true);
if (!tail_len) { if (!tail_len) {
/* /*
* Empty message: once the head is written, * Empty message: once the head is written,
@ -1685,7 +1657,7 @@ static int prepare_message_secure(struct ceph_connection *con)
} }
encode_epilogue_secure(con, false); encode_epilogue_secure(con, false);
ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop, ret = setup_message_sgs(&sgt, msg, zerop, zerop, zerop,
&con->v2.out_epil, NULL, 0, false); &con->v2.out_epil, NULL, 0, false);
if (ret) if (ret)
goto out; goto out;
@ -1714,7 +1686,7 @@ static int prepare_message_secure(struct ceph_connection *con)
goto out; goto out;
dout("%s con %p msg %p sg_cnt %d enc_page_cnt %d\n", __func__, con, dout("%s con %p msg %p sg_cnt %d enc_page_cnt %d\n", __func__, con,
con->out_msg, sgt.orig_nents, enc_page_cnt); msg, sgt.orig_nents, enc_page_cnt);
con->v2.out_state = OUT_S_QUEUE_ENC_PAGE; con->v2.out_state = OUT_S_QUEUE_ENC_PAGE;
out: out:
@ -1723,19 +1695,19 @@ out:
return ret; return ret;
} }
static int prepare_message(struct ceph_connection *con) static int prepare_message(struct ceph_connection *con, struct ceph_msg *msg)
{ {
int lens[] = { int lens[] = {
sizeof(struct ceph_msg_header2), sizeof(struct ceph_msg_header2),
front_len(con->out_msg), front_len(msg),
middle_len(con->out_msg), middle_len(msg),
data_len(con->out_msg) data_len(msg)
}; };
struct ceph_frame_desc desc; struct ceph_frame_desc desc;
int ret; int ret;
dout("%s con %p msg %p logical %d+%d+%d+%d\n", __func__, con, dout("%s con %p msg %p logical %d+%d+%d+%d\n", __func__, con,
con->out_msg, lens[0], lens[1], lens[2], lens[3]); msg, lens[0], lens[1], lens[2], lens[3]);
if (con->in_seq > con->in_seq_acked) { if (con->in_seq > con->in_seq_acked) {
dout("%s con %p in_seq_acked %llu -> %llu\n", __func__, con, dout("%s con %p in_seq_acked %llu -> %llu\n", __func__, con,
@ -1746,15 +1718,15 @@ static int prepare_message(struct ceph_connection *con)
reset_out_kvecs(con); reset_out_kvecs(con);
init_frame_desc(&desc, FRAME_TAG_MESSAGE, lens, 4); init_frame_desc(&desc, FRAME_TAG_MESSAGE, lens, 4);
encode_preamble(&desc, con->v2.out_buf); encode_preamble(&desc, con->v2.out_buf);
fill_header2(CTRL_BODY(con->v2.out_buf), &con->out_msg->hdr, fill_header2(CTRL_BODY(con->v2.out_buf), &msg->hdr,
con->in_seq_acked); con->in_seq_acked);
if (con_secure(con)) { if (con_secure(con)) {
ret = prepare_message_secure(con); ret = prepare_message_secure(con, msg);
if (ret) if (ret)
return ret; return ret;
} else { } else {
prepare_message_plain(con); prepare_message_plain(con, msg);
} }
ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
@ -2460,10 +2432,8 @@ static int process_auth_signature(struct ceph_connection *con,
return -EINVAL; return -EINVAL;
} }
ret = ceph_hmac_sha256(con, con->v2.out_sign_kvecs, ceph_hmac_sha256(con, con->v2.out_sign_kvecs, con->v2.out_sign_kvec_cnt,
con->v2.out_sign_kvec_cnt, hmac); hmac);
if (ret)
return ret;
ceph_decode_need(&p, end, SHA256_DIGEST_SIZE, bad); ceph_decode_need(&p, end, SHA256_DIGEST_SIZE, bad);
if (crypto_memneq(p, hmac, SHA256_DIGEST_SIZE)) { if (crypto_memneq(p, hmac, SHA256_DIGEST_SIZE)) {
@ -3184,20 +3154,20 @@ int ceph_con_v2_try_read(struct ceph_connection *con)
} }
} }
static void queue_data(struct ceph_connection *con) static void queue_data(struct ceph_connection *con, struct ceph_msg *msg)
{ {
struct bio_vec bv; struct bio_vec bv;
con->v2.out_epil.data_crc = -1; con->v2.out_epil.data_crc = -1;
ceph_msg_data_cursor_init(&con->v2.out_cursor, con->out_msg, ceph_msg_data_cursor_init(&con->v2.out_cursor, msg,
data_len(con->out_msg)); data_len(msg));
get_bvec_at(&con->v2.out_cursor, &bv); get_bvec_at(&con->v2.out_cursor, &bv);
set_out_bvec(con, &bv, true); set_out_bvec(con, &bv, true);
con->v2.out_state = OUT_S_QUEUE_DATA_CONT; con->v2.out_state = OUT_S_QUEUE_DATA_CONT;
} }
static void queue_data_cont(struct ceph_connection *con) static void queue_data_cont(struct ceph_connection *con, struct ceph_msg *msg)
{ {
struct bio_vec bv; struct bio_vec bv;
@ -3218,7 +3188,7 @@ static void queue_data_cont(struct ceph_connection *con)
* we are done. * we are done.
*/ */
reset_out_kvecs(con); reset_out_kvecs(con);
prepare_epilogue_plain(con, false); prepare_epilogue_plain(con, msg, false);
con->v2.out_state = OUT_S_FINISH_MESSAGE; con->v2.out_state = OUT_S_FINISH_MESSAGE;
} }
@ -3250,7 +3220,7 @@ static void queue_enc_page(struct ceph_connection *con)
con->v2.out_state = OUT_S_FINISH_MESSAGE; con->v2.out_state = OUT_S_FINISH_MESSAGE;
} }
static void queue_zeros(struct ceph_connection *con) static void queue_zeros(struct ceph_connection *con, struct ceph_msg *msg)
{ {
dout("%s con %p out_zero %d\n", __func__, con, con->v2.out_zero); dout("%s con %p out_zero %d\n", __func__, con, con->v2.out_zero);
@ -3267,7 +3237,7 @@ static void queue_zeros(struct ceph_connection *con)
* Once it's written, we are done patching up for the revoke. * Once it's written, we are done patching up for the revoke.
*/ */
reset_out_kvecs(con); reset_out_kvecs(con);
prepare_epilogue_plain(con, true); prepare_epilogue_plain(con, msg, true);
con->v2.out_state = OUT_S_FINISH_MESSAGE; con->v2.out_state = OUT_S_FINISH_MESSAGE;
} }
@ -3294,6 +3264,7 @@ static void finish_message(struct ceph_connection *con)
static int populate_out_iter(struct ceph_connection *con) static int populate_out_iter(struct ceph_connection *con)
{ {
struct ceph_msg *msg;
int ret; int ret;
dout("%s con %p state %d out_state %d\n", __func__, con, con->state, dout("%s con %p state %d out_state %d\n", __func__, con, con->state,
@ -3309,18 +3280,18 @@ static int populate_out_iter(struct ceph_connection *con)
switch (con->v2.out_state) { switch (con->v2.out_state) {
case OUT_S_QUEUE_DATA: case OUT_S_QUEUE_DATA:
WARN_ON(!con->out_msg); WARN_ON(!con->out_msg);
queue_data(con); queue_data(con, con->out_msg);
goto populated; goto populated;
case OUT_S_QUEUE_DATA_CONT: case OUT_S_QUEUE_DATA_CONT:
WARN_ON(!con->out_msg); WARN_ON(!con->out_msg);
queue_data_cont(con); queue_data_cont(con, con->out_msg);
goto populated; goto populated;
case OUT_S_QUEUE_ENC_PAGE: case OUT_S_QUEUE_ENC_PAGE:
queue_enc_page(con); queue_enc_page(con);
goto populated; goto populated;
case OUT_S_QUEUE_ZEROS: case OUT_S_QUEUE_ZEROS:
WARN_ON(con->out_msg); /* revoked */ WARN_ON(con->out_msg); /* revoked */
queue_zeros(con); queue_zeros(con, con->out_msg);
goto populated; goto populated;
case OUT_S_FINISH_MESSAGE: case OUT_S_FINISH_MESSAGE:
finish_message(con); finish_message(con);
@ -3339,9 +3310,8 @@ static int populate_out_iter(struct ceph_connection *con)
pr_err("prepare_keepalive2 failed: %d\n", ret); pr_err("prepare_keepalive2 failed: %d\n", ret);
return ret; return ret;
} }
} else if (!list_empty(&con->out_queue)) { } else if ((msg = ceph_con_get_out_msg(con)) != NULL) {
ceph_con_get_out_msg(con); ret = prepare_message(con, msg);
ret = prepare_message(con);
if (ret) { if (ret) {
pr_err("prepare_message failed: %d\n", ret); pr_err("prepare_message failed: %d\n", ret);
return ret; return ret;
@ -3453,17 +3423,18 @@ static u32 crc32c_zeros(u32 crc, int zero_len)
return crc; return crc;
} }
static void prepare_zero_front(struct ceph_connection *con, int resid) static void prepare_zero_front(struct ceph_connection *con,
struct ceph_msg *msg, int resid)
{ {
int sent; int sent;
WARN_ON(!resid || resid > front_len(con->out_msg)); WARN_ON(!resid || resid > front_len(msg));
sent = front_len(con->out_msg) - resid; sent = front_len(msg) - resid;
dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid); dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid);
if (sent) { if (sent) {
con->v2.out_epil.front_crc = con->v2.out_epil.front_crc =
crc32c(-1, con->out_msg->front.iov_base, sent); crc32c(-1, msg->front.iov_base, sent);
con->v2.out_epil.front_crc = con->v2.out_epil.front_crc =
crc32c_zeros(con->v2.out_epil.front_crc, resid); crc32c_zeros(con->v2.out_epil.front_crc, resid);
} else { } else {
@ -3474,17 +3445,18 @@ static void prepare_zero_front(struct ceph_connection *con, int resid)
out_zero_add(con, resid); out_zero_add(con, resid);
} }
static void prepare_zero_middle(struct ceph_connection *con, int resid) static void prepare_zero_middle(struct ceph_connection *con,
struct ceph_msg *msg, int resid)
{ {
int sent; int sent;
WARN_ON(!resid || resid > middle_len(con->out_msg)); WARN_ON(!resid || resid > middle_len(msg));
sent = middle_len(con->out_msg) - resid; sent = middle_len(msg) - resid;
dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid); dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid);
if (sent) { if (sent) {
con->v2.out_epil.middle_crc = con->v2.out_epil.middle_crc =
crc32c(-1, con->out_msg->middle->vec.iov_base, sent); crc32c(-1, msg->middle->vec.iov_base, sent);
con->v2.out_epil.middle_crc = con->v2.out_epil.middle_crc =
crc32c_zeros(con->v2.out_epil.middle_crc, resid); crc32c_zeros(con->v2.out_epil.middle_crc, resid);
} else { } else {
@ -3495,61 +3467,64 @@ static void prepare_zero_middle(struct ceph_connection *con, int resid)
out_zero_add(con, resid); out_zero_add(con, resid);
} }
static void prepare_zero_data(struct ceph_connection *con) static void prepare_zero_data(struct ceph_connection *con,
struct ceph_msg *msg)
{ {
dout("%s con %p\n", __func__, con); dout("%s con %p\n", __func__, con);
con->v2.out_epil.data_crc = crc32c_zeros(-1, data_len(con->out_msg)); con->v2.out_epil.data_crc = crc32c_zeros(-1, data_len(msg));
out_zero_add(con, data_len(con->out_msg)); out_zero_add(con, data_len(msg));
} }
static void revoke_at_queue_data(struct ceph_connection *con) static void revoke_at_queue_data(struct ceph_connection *con,
struct ceph_msg *msg)
{ {
int boundary; int boundary;
int resid; int resid;
WARN_ON(!data_len(con->out_msg)); WARN_ON(!data_len(msg));
WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter)); WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter));
resid = iov_iter_count(&con->v2.out_iter); resid = iov_iter_count(&con->v2.out_iter);
boundary = front_len(con->out_msg) + middle_len(con->out_msg); boundary = front_len(msg) + middle_len(msg);
if (resid > boundary) { if (resid > boundary) {
resid -= boundary; resid -= boundary;
WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN); WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN);
dout("%s con %p was sending head\n", __func__, con); dout("%s con %p was sending head\n", __func__, con);
if (front_len(con->out_msg)) if (front_len(msg))
prepare_zero_front(con, front_len(con->out_msg)); prepare_zero_front(con, msg, front_len(msg));
if (middle_len(con->out_msg)) if (middle_len(msg))
prepare_zero_middle(con, middle_len(con->out_msg)); prepare_zero_middle(con, msg, middle_len(msg));
prepare_zero_data(con); prepare_zero_data(con, msg);
WARN_ON(iov_iter_count(&con->v2.out_iter) != resid); WARN_ON(iov_iter_count(&con->v2.out_iter) != resid);
con->v2.out_state = OUT_S_QUEUE_ZEROS; con->v2.out_state = OUT_S_QUEUE_ZEROS;
return; return;
} }
boundary = middle_len(con->out_msg); boundary = middle_len(msg);
if (resid > boundary) { if (resid > boundary) {
resid -= boundary; resid -= boundary;
dout("%s con %p was sending front\n", __func__, con); dout("%s con %p was sending front\n", __func__, con);
prepare_zero_front(con, resid); prepare_zero_front(con, msg, resid);
if (middle_len(con->out_msg)) if (middle_len(msg))
prepare_zero_middle(con, middle_len(con->out_msg)); prepare_zero_middle(con, msg, middle_len(msg));
prepare_zero_data(con); prepare_zero_data(con, msg);
queue_zeros(con); queue_zeros(con, msg);
return; return;
} }
WARN_ON(!resid); WARN_ON(!resid);
dout("%s con %p was sending middle\n", __func__, con); dout("%s con %p was sending middle\n", __func__, con);
prepare_zero_middle(con, resid); prepare_zero_middle(con, msg, resid);
prepare_zero_data(con); prepare_zero_data(con, msg);
queue_zeros(con); queue_zeros(con, msg);
} }
static void revoke_at_queue_data_cont(struct ceph_connection *con) static void revoke_at_queue_data_cont(struct ceph_connection *con,
struct ceph_msg *msg)
{ {
int sent, resid; /* current piece of data */ int sent, resid; /* current piece of data */
WARN_ON(!data_len(con->out_msg)); WARN_ON(!data_len(msg));
WARN_ON(!iov_iter_is_bvec(&con->v2.out_iter)); WARN_ON(!iov_iter_is_bvec(&con->v2.out_iter));
resid = iov_iter_count(&con->v2.out_iter); resid = iov_iter_count(&con->v2.out_iter);
WARN_ON(!resid || resid > con->v2.out_bvec.bv_len); WARN_ON(!resid || resid > con->v2.out_bvec.bv_len);
@ -3568,10 +3543,11 @@ static void revoke_at_queue_data_cont(struct ceph_connection *con)
con->v2.out_iter.count -= resid; con->v2.out_iter.count -= resid;
out_zero_add(con, con->v2.out_cursor.total_resid); out_zero_add(con, con->v2.out_cursor.total_resid);
queue_zeros(con); queue_zeros(con, msg);
} }
static void revoke_at_finish_message(struct ceph_connection *con) static void revoke_at_finish_message(struct ceph_connection *con,
struct ceph_msg *msg)
{ {
int boundary; int boundary;
int resid; int resid;
@ -3579,39 +3555,39 @@ static void revoke_at_finish_message(struct ceph_connection *con)
WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter)); WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter));
resid = iov_iter_count(&con->v2.out_iter); resid = iov_iter_count(&con->v2.out_iter);
if (!front_len(con->out_msg) && !middle_len(con->out_msg) && if (!front_len(msg) && !middle_len(msg) &&
!data_len(con->out_msg)) { !data_len(msg)) {
WARN_ON(!resid || resid > MESSAGE_HEAD_PLAIN_LEN); WARN_ON(!resid || resid > MESSAGE_HEAD_PLAIN_LEN);
dout("%s con %p was sending head (empty message) - noop\n", dout("%s con %p was sending head (empty message) - noop\n",
__func__, con); __func__, con);
return; return;
} }
boundary = front_len(con->out_msg) + middle_len(con->out_msg) + boundary = front_len(msg) + middle_len(msg) +
CEPH_EPILOGUE_PLAIN_LEN; CEPH_EPILOGUE_PLAIN_LEN;
if (resid > boundary) { if (resid > boundary) {
resid -= boundary; resid -= boundary;
WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN); WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN);
dout("%s con %p was sending head\n", __func__, con); dout("%s con %p was sending head\n", __func__, con);
if (front_len(con->out_msg)) if (front_len(msg))
prepare_zero_front(con, front_len(con->out_msg)); prepare_zero_front(con, msg, front_len(msg));
if (middle_len(con->out_msg)) if (middle_len(msg))
prepare_zero_middle(con, middle_len(con->out_msg)); prepare_zero_middle(con, msg, middle_len(msg));
con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN; con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
WARN_ON(iov_iter_count(&con->v2.out_iter) != resid); WARN_ON(iov_iter_count(&con->v2.out_iter) != resid);
con->v2.out_state = OUT_S_QUEUE_ZEROS; con->v2.out_state = OUT_S_QUEUE_ZEROS;
return; return;
} }
boundary = middle_len(con->out_msg) + CEPH_EPILOGUE_PLAIN_LEN; boundary = middle_len(msg) + CEPH_EPILOGUE_PLAIN_LEN;
if (resid > boundary) { if (resid > boundary) {
resid -= boundary; resid -= boundary;
dout("%s con %p was sending front\n", __func__, con); dout("%s con %p was sending front\n", __func__, con);
prepare_zero_front(con, resid); prepare_zero_front(con, msg, resid);
if (middle_len(con->out_msg)) if (middle_len(msg))
prepare_zero_middle(con, middle_len(con->out_msg)); prepare_zero_middle(con, msg, middle_len(msg));
con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN; con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
queue_zeros(con); queue_zeros(con, msg);
return; return;
} }
@ -3619,9 +3595,9 @@ static void revoke_at_finish_message(struct ceph_connection *con)
if (resid > boundary) { if (resid > boundary) {
resid -= boundary; resid -= boundary;
dout("%s con %p was sending middle\n", __func__, con); dout("%s con %p was sending middle\n", __func__, con);
prepare_zero_middle(con, resid); prepare_zero_middle(con, msg, resid);
con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN; con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN;
queue_zeros(con); queue_zeros(con, msg);
return; return;
} }
@ -3629,7 +3605,7 @@ static void revoke_at_finish_message(struct ceph_connection *con)
dout("%s con %p was sending epilogue - noop\n", __func__, con); dout("%s con %p was sending epilogue - noop\n", __func__, con);
} }
void ceph_con_v2_revoke(struct ceph_connection *con) void ceph_con_v2_revoke(struct ceph_connection *con, struct ceph_msg *msg)
{ {
WARN_ON(con->v2.out_zero); WARN_ON(con->v2.out_zero);
@ -3642,13 +3618,13 @@ void ceph_con_v2_revoke(struct ceph_connection *con)
switch (con->v2.out_state) { switch (con->v2.out_state) {
case OUT_S_QUEUE_DATA: case OUT_S_QUEUE_DATA:
revoke_at_queue_data(con); revoke_at_queue_data(con, msg);
break; break;
case OUT_S_QUEUE_DATA_CONT: case OUT_S_QUEUE_DATA_CONT:
revoke_at_queue_data_cont(con); revoke_at_queue_data_cont(con, msg);
break; break;
case OUT_S_FINISH_MESSAGE: case OUT_S_FINISH_MESSAGE:
revoke_at_finish_message(con); revoke_at_finish_message(con, msg);
break; break;
default: default:
WARN(1, "bad out_state %d", con->v2.out_state); WARN(1, "bad out_state %d", con->v2.out_state);
@ -3814,10 +3790,8 @@ void ceph_con_v2_reset_protocol(struct ceph_connection *con)
memzero_explicit(&con->v2.in_gcm_nonce, CEPH_GCM_IV_LEN); memzero_explicit(&con->v2.in_gcm_nonce, CEPH_GCM_IV_LEN);
memzero_explicit(&con->v2.out_gcm_nonce, CEPH_GCM_IV_LEN); memzero_explicit(&con->v2.out_gcm_nonce, CEPH_GCM_IV_LEN);
if (con->v2.hmac_tfm) { memzero_explicit(&con->v2.hmac_key, sizeof(con->v2.hmac_key));
crypto_free_shash(con->v2.hmac_tfm); con->v2.hmac_key_set = false;
con->v2.hmac_tfm = NULL;
}
if (con->v2.gcm_req) { if (con->v2.gcm_req) {
aead_request_free(con->v2.gcm_req); aead_request_free(con->v2.gcm_req);
con->v2.gcm_req = NULL; con->v2.gcm_req = NULL;