tls: Preserve sk_err across recvmsg() when data has been copied
The sk_err check in tls_rx_rec_wait() consumes the error via
sock_error(), which clears sk_err atomically. When the caller
(tls_sw_recvmsg, tls_sw_splice_read, or tls_sw_read_sock) already
has bytes copied to userspace, it returns those bytes and discards
the error from this call. sk_err is now zero on the socket, so the
next read syscall observes only RCV_SHUTDOWN and reports a clean
EOF instead of the actual error (typically -ECONNRESET).
The race is reachable when tls_read_flush_backlog()'s periodic
sk_flush_backlog() triggers tcp_reset() in the middle of a
multi-record read.
Pass a has_copied flag to tls_rx_rec_wait(). When has_copied is
false, consume sk_err via sock_error() as before. When has_copied
is true, report the error from READ_ONCE() but leave sk_err set:
the caller returns the byte count and discards the err from this
call, and the next read syscall surfaces the preserved sk_err. This
mirrors the tcp_recvmsg() preserve-and-surface pattern.
The decrypt-abort path is unaffected: tls_err_abort() raises
sk_err to EBADMSG after tls_rx_rec_wait() returns, and nothing
on the caller's return path consumes it, so the EBADMSG surfaces
on the next read.
tls_sw_splice_read() passes has_copied=false: it processes
one record per call, so no bytes have been copied within the
function when tls_rx_rec_wait() runs. A reset that arrives
between iterations of splice_direct_to_actor() (the sendfile()
path) is still consumed by sock_error() in the later call, and the
outer loop returns the prior iterations' byte count and drops the
error. tcp_splice_read() exhibits the same pattern at the iteration
boundary; addressing it belongs at the splice_direct_to_actor()
layer and is out of scope here.
Fixes: c46b01839f ("tls: rx: periodically flush socket backlog")
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Link: https://patch.msgid.link/20260513125825.205189-1-cel@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
master
parent
e8fb3de2a8
commit
f508262ae9
|
|
@ -1366,9 +1366,14 @@ unlock:
|
||||||
mutex_unlock(&tls_ctx->tx_lock);
|
mutex_unlock(&tls_ctx->tx_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* When has_copied is true the caller has already moved bytes to
|
||||||
|
* userspace. Report sk_err but leave it set so the next read
|
||||||
|
* surfaces it instead of a spurious EOF, otherwise sk_err is
|
||||||
|
* consumed via sock_error().
|
||||||
|
*/
|
||||||
static int
|
static int
|
||||||
tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
|
tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
|
||||||
bool released)
|
bool released, bool has_copied)
|
||||||
{
|
{
|
||||||
struct tls_context *tls_ctx = tls_get_ctx(sk);
|
struct tls_context *tls_ctx = tls_get_ctx(sk);
|
||||||
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
|
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
|
||||||
|
|
@ -1386,8 +1391,11 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
|
||||||
if (!sk_psock_queue_empty(psock))
|
if (!sk_psock_queue_empty(psock))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (sk->sk_err)
|
if (sk->sk_err) {
|
||||||
|
if (has_copied)
|
||||||
|
return -READ_ONCE(sk->sk_err);
|
||||||
return sock_error(sk);
|
return sock_error(sk);
|
||||||
|
}
|
||||||
|
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
@ -1423,7 +1431,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(!tls_strp_msg_load(&ctx->strp, released)))
|
if (unlikely(!tls_strp_msg_load(&ctx->strp, released)))
|
||||||
return tls_rx_rec_wait(sk, psock, nonblock, false);
|
return tls_rx_rec_wait(sk, psock, nonblock, false, has_copied);
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
@ -2110,7 +2118,7 @@ int tls_sw_recvmsg(struct sock *sk,
|
||||||
int to_decrypt, chunk;
|
int to_decrypt, chunk;
|
||||||
|
|
||||||
err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT,
|
err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT,
|
||||||
released);
|
released, !!(decrypted + copied));
|
||||||
if (err <= 0) {
|
if (err <= 0) {
|
||||||
if (psock) {
|
if (psock) {
|
||||||
chunk = sk_msg_recvmsg(sk, psock, msg, len,
|
chunk = sk_msg_recvmsg(sk, psock, msg, len,
|
||||||
|
|
@ -2297,7 +2305,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
|
||||||
struct tls_decrypt_arg darg;
|
struct tls_decrypt_arg darg;
|
||||||
|
|
||||||
err = tls_rx_rec_wait(sk, NULL, flags & SPLICE_F_NONBLOCK,
|
err = tls_rx_rec_wait(sk, NULL, flags & SPLICE_F_NONBLOCK,
|
||||||
true);
|
true, false);
|
||||||
if (err <= 0)
|
if (err <= 0)
|
||||||
goto splice_read_end;
|
goto splice_read_end;
|
||||||
|
|
||||||
|
|
@ -2383,7 +2391,7 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
|
||||||
} else {
|
} else {
|
||||||
struct tls_decrypt_arg darg;
|
struct tls_decrypt_arg darg;
|
||||||
|
|
||||||
err = tls_rx_rec_wait(sk, NULL, true, released);
|
err = tls_rx_rec_wait(sk, NULL, true, released, !!copied);
|
||||||
if (err <= 0)
|
if (err <= 0)
|
||||||
goto read_sock_end;
|
goto read_sock_end;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue