fs/nls: Fix utf16 to utf8 conversion

Currently the function responsible for converting between utf16 and
utf8 strings will ignore any characters that cannot be converted. This
however also includes multi-byte characters that do not fit into the
provided string buffer.

This can cause problems if such a multi-byte character is followed by
a single-byte character. In such a case the multi-byte character might
be ignored when the provided string buffer is too small, but the
single-byte character might fit and is thus still copied into the
resulting string.

Fix this by stop filling the provided string buffer once a character
does not fit. In order to be able to do this extend utf32_to_utf8()
to return useful errno codes instead of -1.

Fixes: 74675a5850 ("NLS: update handling of Unicode")
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20251111131125.3379-2-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
pull/1354/merge
Armin Wolf 2025-11-11 14:11:22 +01:00 committed by Ilpo Järvinen
parent 39ae6c50e5
commit 25524b6190
No known key found for this signature in database
GPG Key ID: 59AC4F6153E5CE31
1 changed files with 12 additions and 4 deletions

View File

@ -94,7 +94,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxout)
l = u; l = u;
if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR) if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR)
return -1; return -EILSEQ;
nc = 0; nc = 0;
for (t = utf8_table; t->cmask && maxout; t++, maxout--) { for (t = utf8_table; t->cmask && maxout; t++, maxout--) {
@ -110,7 +110,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxout)
return nc; return nc;
} }
} }
return -1; return -EOVERFLOW;
} }
EXPORT_SYMBOL(utf32_to_utf8); EXPORT_SYMBOL(utf32_to_utf8);
@ -217,8 +217,16 @@ int utf16s_to_utf8s(const wchar_t *pwcs, int inlen, enum utf16_endian endian,
inlen--; inlen--;
} }
size = utf32_to_utf8(u, op, maxout); size = utf32_to_utf8(u, op, maxout);
if (size == -1) { if (size < 0) {
/* Ignore character and move on */ if (size == -EILSEQ) {
/* Ignore character and move on */
continue;
}
/*
* Stop filling the buffer with data once a character
* does not fit anymore.
*/
break;
} else { } else {
op += size; op += size;
maxout -= size; maxout -= size;