195 lines
5.6 KiB
C
195 lines
5.6 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Copyright (c) 2025 Oracle. All Rights Reserved.
|
|
* Author: Darrick J. Wong <djwong@kernel.org>
|
|
*/
|
|
#include <linux/fs.h>
|
|
#include <linux/fsnotify.h>
|
|
#include <linux/mempool.h>
|
|
#include <linux/fserror.h>
|
|
|
|
#define FSERROR_DEFAULT_EVENT_POOL_SIZE (32)
|
|
|
|
static struct mempool fserror_events_pool;
|
|
|
|
void fserror_mount(struct super_block *sb)
|
|
{
|
|
/*
|
|
* The pending error counter is biased by 1 so that we don't wake_var
|
|
* until we're actually trying to unmount.
|
|
*/
|
|
refcount_set(&sb->s_pending_errors, 1);
|
|
}
|
|
|
|
void fserror_unmount(struct super_block *sb)
|
|
{
|
|
/*
|
|
* If we don't drop the pending error count to zero, then wait for it
|
|
* to drop below 1, which means that the pending errors cleared and
|
|
* hopefully we didn't saturate with 1 billion+ concurrent events.
|
|
*/
|
|
if (!refcount_dec_and_test(&sb->s_pending_errors))
|
|
wait_var_event(&sb->s_pending_errors,
|
|
refcount_read(&sb->s_pending_errors) < 1);
|
|
}
|
|
|
|
static inline void fserror_pending_dec(struct super_block *sb)
|
|
{
|
|
if (refcount_dec_and_test(&sb->s_pending_errors))
|
|
wake_up_var(&sb->s_pending_errors);
|
|
}
|
|
|
|
static inline void fserror_free_event(struct fserror_event *event)
|
|
{
|
|
fserror_pending_dec(event->sb);
|
|
mempool_free(event, &fserror_events_pool);
|
|
}
|
|
|
|
static void fserror_worker(struct work_struct *work)
|
|
{
|
|
struct fserror_event *event =
|
|
container_of(work, struct fserror_event, work);
|
|
struct super_block *sb = event->sb;
|
|
|
|
if (sb->s_flags & SB_ACTIVE) {
|
|
struct fs_error_report report = {
|
|
/* send positive error number to userspace */
|
|
.error = -event->error,
|
|
.inode = event->inode,
|
|
.sb = event->sb,
|
|
};
|
|
|
|
if (sb->s_op->report_error)
|
|
sb->s_op->report_error(event);
|
|
|
|
fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, NULL, NULL,
|
|
NULL, 0);
|
|
}
|
|
|
|
iput(event->inode);
|
|
fserror_free_event(event);
|
|
}
|
|
|
|
static inline struct fserror_event *fserror_alloc_event(struct super_block *sb,
|
|
gfp_t gfp_flags)
|
|
{
|
|
struct fserror_event *event = NULL;
|
|
|
|
/*
|
|
* If pending_errors already reached zero or is no longer active,
|
|
* the superblock is being deactivated so there's no point in
|
|
* continuing.
|
|
*
|
|
* The order of the check of s_pending_errors and SB_ACTIVE are
|
|
* mandated by order of accesses in generic_shutdown_super and
|
|
* fserror_unmount. Barriers are implicitly provided by the refcount
|
|
* manipulations in this function and fserror_unmount.
|
|
*/
|
|
if (!refcount_inc_not_zero(&sb->s_pending_errors))
|
|
return NULL;
|
|
if (!(sb->s_flags & SB_ACTIVE))
|
|
goto out_pending;
|
|
|
|
event = mempool_alloc(&fserror_events_pool, gfp_flags);
|
|
if (!event)
|
|
goto out_pending;
|
|
|
|
/* mempool_alloc doesn't support GFP_ZERO */
|
|
memset(event, 0, sizeof(*event));
|
|
event->sb = sb;
|
|
INIT_WORK(&event->work, fserror_worker);
|
|
|
|
return event;
|
|
|
|
out_pending:
|
|
fserror_pending_dec(sb);
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* fserror_report - report a filesystem error of some kind
|
|
*
|
|
* @sb: superblock of the filesystem
|
|
* @inode: inode within that filesystem, if applicable
|
|
* @type: type of error encountered
|
|
* @pos: start of inode range affected, if applicable
|
|
* @len: length of inode range affected, if applicable
|
|
* @error: error number encountered, must be negative
|
|
* @gfp: memory allocation flags for conveying the event to a worker,
|
|
* since this function can be called from atomic contexts
|
|
*
|
|
* Report details of a filesystem error to the super_operations::report_error
|
|
* callback if present; and to fsnotify for distribution to userspace. @sb,
|
|
* @gfp, @type, and @error must all be specified. For file I/O errors, the
|
|
* @inode, @pos, and @len fields must also be specified. For file metadata
|
|
* errors, @inode must be specified. If @inode is not NULL, then @inode->i_sb
|
|
* must point to @sb.
|
|
*
|
|
* Reporting work is deferred to a workqueue to ensure that ->report_error is
|
|
* called from process context without any locks held. An active reference to
|
|
* the inode is maintained until event handling is complete, and unmount will
|
|
* wait for queued events to drain.
|
|
*/
|
|
void fserror_report(struct super_block *sb, struct inode *inode,
|
|
enum fserror_type type, loff_t pos, u64 len, int error,
|
|
gfp_t gfp)
|
|
{
|
|
struct fserror_event *event;
|
|
|
|
/* sb and inode must be from the same filesystem */
|
|
WARN_ON_ONCE(inode && inode->i_sb != sb);
|
|
|
|
/* error number must be negative */
|
|
WARN_ON_ONCE(error >= 0);
|
|
|
|
event = fserror_alloc_event(sb, gfp);
|
|
if (!event)
|
|
goto lost;
|
|
|
|
event->type = type;
|
|
event->pos = pos;
|
|
event->len = len;
|
|
event->error = error;
|
|
|
|
/*
|
|
* Can't iput from non-sleeping context, so grabbing another reference
|
|
* to the inode must be the last thing before submitting the event.
|
|
*/
|
|
if (inode) {
|
|
event->inode = igrab(inode);
|
|
if (!event->inode)
|
|
goto lost_event;
|
|
}
|
|
|
|
/*
|
|
* Use schedule_work here even if we're already in process context so
|
|
* that fsnotify and super_operations::report_error implementations are
|
|
* guaranteed to run in process context without any locks held. Since
|
|
* errors are supposed to be rare, the overhead shouldn't kill us any
|
|
* more than the failing device will.
|
|
*/
|
|
schedule_work(&event->work);
|
|
return;
|
|
|
|
lost_event:
|
|
fserror_free_event(event);
|
|
lost:
|
|
if (inode)
|
|
pr_err_ratelimited(
|
|
"%s: lost file I/O error report for ino %lu type %u pos 0x%llx len 0x%llx error %d",
|
|
sb->s_id, inode->i_ino, type, pos, len, error);
|
|
else
|
|
pr_err_ratelimited(
|
|
"%s: lost filesystem error report for type %u error %d",
|
|
sb->s_id, type, error);
|
|
}
|
|
EXPORT_SYMBOL_GPL(fserror_report);
|
|
|
|
static int __init fserror_init(void)
|
|
{
|
|
return mempool_init_kmalloc_pool(&fserror_events_pool,
|
|
FSERROR_DEFAULT_EVENT_POOL_SIZE,
|
|
sizeof(struct fserror_event));
|
|
}
|
|
fs_initcall(fserror_init);
|