macOS: Set LANGUAGE env var based on macOS preferred language list (#6628)

Sets the LANGUAGE environment variable based on the preferred languages
as reported by NSLocale.

macOS has a concept of preferred languages separate from the system
locale. The set of preferred languages is a list in priority order of
what translations the user prefers. A user can have, for example,
"fr_FR" as their locale but "en" as their preferred language. This would
mean that they want to use French units, date formats, etc. but they
prefer English translations.

gettext uses the LANGUAGE environment variable to override only
translations and a priority order can be specified by separating the
languages with colons. For example, "en:fr" would mean that English
translations are preferred but if they are not available then French
translations should be used.

To further complicate things, Apple reports the languages in BCP-47
format which is not compatible with gettext's POSIX locale format so we
have to canonicalize them. To canonicalize the languages we use an
internal function from libintl. This isn't normally available but since
we compile from source on macOS we can use it. This isn't necessary for
other platforms.

This logic is only run if the user didn't explicitly request a specific
locale, so it should really only affect macOS app launches. From the CLI
the environment will have a locale unless the user really explicitly
clears it out.
pull/6622/head
Mitchell Hashimoto 2025-03-08 14:36:58 -08:00 committed by GitHub
commit 5efa2a6ca1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 169 additions and 11 deletions

View File

@ -81,6 +81,46 @@ pub fn _(msgid: [*:0]const u8) [*:0]const u8 {
return dgettext(build_config.bundle_id, msgid);
}
/// Canonicalize a locale name from a platform-specific value to
/// a POSIX-compliant value. This is a thin layer over the unexported
/// gnulib-lib function in gettext that does this already.
///
/// The gnulib-lib function modifies the buffer in place but has
/// zero bounds checking, so we do a bit extra to ensure we don't
/// overflow the buffer. This is likely slightly more expensive but
/// this isn't a hot path so it should be fine.
///
/// The buffer must be at least 16 bytes long. This ensures we can
/// fit the longest possible hardcoded locale name. Additionally,
/// it should be at least as long as locale in case the locale
/// is unchanged.
///
/// Here is the logic for macOS, but other platforms also have
/// their own canonicalization logic:
///
/// https://github.com/coreutils/gnulib/blob/5b92dd0a45c8d27f13a21076b57095ea5e220870/lib/localename.c#L1171
pub fn canonicalizeLocale(
buf: []u8,
locale: []const u8,
) error{NoSpaceLeft}![:0]const u8 {
// Buffer must be 16 or at least as long as the locale and null term
if (buf.len < @max(16, locale.len + 1)) return error.NoSpaceLeft;
// Copy our locale into the buffer since it modifies in place.
// This must be null-terminated.
@memcpy(buf[0..locale.len], locale);
buf[locale.len] = 0;
_libintl_locale_name_canonicalize(buf[0..locale.len :0]);
// Convert the null-terminated result buffer into a slice. We
// need to search for the null terminator and slice it back.
// We have to use `buf` since `slice` len will exclude the
// null.
const slice = std.mem.sliceTo(buf, 0);
return buf[0..slice.len :0];
}
/// This can be called at any point a compile-time-known locale is
/// available. This will use comptime to verify the locale is supported.
pub fn staticLocale(comptime v: [*:0]const u8) [*:0]const u8 {
@ -100,3 +140,23 @@ pub fn staticLocale(comptime v: [*:0]const u8) [*:0]const u8 {
extern fn bindtextdomain(domainname: [*:0]const u8, dirname: [*:0]const u8) ?[*:0]const u8;
extern fn textdomain(domainname: [*:0]const u8) ?[*:0]const u8;
extern fn dgettext(domainname: [*:0]const u8, msgid: [*:0]const u8) [*:0]const u8;
// This is only available if we're building libintl from source
// since its otherwise not exported. We only need it on macOS
// currently but probably will on Windows as well.
extern fn _libintl_locale_name_canonicalize(name: [*:0]u8) void;
test "canonicalizeLocale darwin" {
if (!builtin.target.isDarwin()) return error.SkipZigTest;
const testing = std.testing;
var buf: [256]u8 = undefined;
try testing.expectEqualStrings("en_US", try canonicalizeLocale(&buf, "en_US"));
try testing.expectEqualStrings("zh_CN", try canonicalizeLocale(&buf, "zh-Hans"));
try testing.expectEqualStrings("zh_TW", try canonicalizeLocale(&buf, "zh-Hant"));
// This is just an edge case I want to make sure we're aware of:
// canonicalizeLocale does not handle encodings and will turn them into
// underscores. We should parse them out before calling this function.
try testing.expectEqualStrings("en_US.UTF_8", try canonicalizeLocale(&buf, "en_US.UTF-8"));
}

View File

@ -91,7 +91,8 @@ fn setLangFromCocoa() void {
const z_lang = std.mem.sliceTo(c_lang, 0);
const z_country = std.mem.sliceTo(c_country, 0);
// Format them into a buffer
// Format our locale as "<lang>_<country>.UTF-8" and set it as LANG.
{
var buf: [128]u8 = undefined;
const env_value = std.fmt.bufPrintZ(&buf, "{s}_{s}.UTF-8", .{ z_lang, z_country }) catch |err| {
log.warn("error setting locale from system. err={}", .{err});
@ -106,6 +107,99 @@ fn setLangFromCocoa() void {
}
}
// Get our preferred languages and set that to the LANGUAGE
// env var in case our language differs from our locale.
var buf: [1024]u8 = undefined;
if (preferredLanguageFromCocoa(&buf, NSLocale)) |pref_| {
if (pref_) |pref| {
log.debug(
"setting LANGUAGE from preferred languages value={s}",
.{pref},
);
_ = internal_os.setenv("LANGUAGE", pref);
}
} else |err| {
log.warn("error getting preferred languages. err={}", .{err});
}
}
/// Sets the LANGUAGE environment variable based on the preferred languages
/// as reported by NSLocale.
///
/// macOS has a concept of preferred languages separate from the system
/// locale. The set of preferred languages is a list in priority order
/// of what translations the user prefers. A user can have, for example,
/// "fr_FR" as their locale but "en" as their preferred language. This would
/// mean that they want to use French units, date formats, etc. but they
/// prefer English translations.
///
/// gettext uses the LANGUAGE environment variable to override only
/// translations and a priority order can be specified by separating
/// the languages with colons. For example, "en:fr" would mean that
/// English translations are preferred but if they are not available
/// then French translations should be used.
///
/// To further complicate things, Apple reports the languages in BCP-47
/// format which is not compatible with gettext's POSIX locale format so
/// we have to canonicalize them.
fn preferredLanguageFromCocoa(
buf: []u8,
NSLocale: objc.Class,
) error{NoSpaceLeft}!?[:0]const u8 {
var fbs = std.io.fixedBufferStream(buf);
const writer = fbs.writer();
// We need to get our app's preferred languages. These may not
// match the system locale (NSLocale.currentLocale).
const preferred: *macos.foundation.Array = array: {
const ns = NSLocale.msgSend(
objc.Object,
objc.sel("preferredLanguages"),
.{},
);
break :array @ptrCast(ns.value);
};
for (0..preferred.getCount()) |i| {
var str_buf: [255:0]u8 = undefined;
const str = preferred.getValueAtIndex(macos.foundation.String, i);
const c_str = str.cstring(&str_buf, .utf8) orelse {
// I don't think this can happen but if it does then I want
// to know about it if a user has translation issues.
log.warn("failed to convert a preferred language to UTF-8", .{});
continue;
};
// Append our separator if we have any previous languages
if (fbs.pos > 0) {
_ = writer.writeByte(':') catch
return error.NoSpaceLeft;
}
// Apple languages are in BCP-47 format, and we need to
// canonicalize them to the POSIX format.
const canon = try i18n.canonicalizeLocale(
fbs.buffer[fbs.pos..],
c_str,
);
fbs.seekBy(@intCast(canon.len)) catch unreachable;
// The canonicalized locale never contains the encoding and
// all of our translations require UTF-8 so we add that.
_ = writer.writeAll(".UTF-8") catch return error.NoSpaceLeft;
}
// If we had no preferred languages then we return nothing.
if (fbs.pos == 0) return null;
// Null terminate it
_ = writer.writeByte(0) catch return error.NoSpaceLeft;
// Get our slice, this won't be null terminated so we have to
// reslice it with the null terminator.
const slice = fbs.getWritten();
return slice[0 .. slice.len - 1 :0];
}
const LC_ALL: c_int = 6; // from locale.h
const LC_ALL_MASK: c_int = 0x7fffffff; // from locale.h
const locale_t = ?*anyopaque;

View File

@ -52,3 +52,7 @@ pub const OpenType = openpkg.Type;
pub const pipe = pipepkg.pipe;
pub const resourcesDir = resourcesdir.resourcesDir;
pub const ShellEscapeWriter = shell.ShellEscapeWriter;
test {
_ = i18n;
}