Metal: use "Managed" resource storage mode on discrete GPUs (#5625)

Discrete GPUs cannot use the "shared" storage mode. This causes
undefined behavior right now, and I believe it's what's causing a
problem on Intel systems with discrete GPUs with "inverted" cells.
(Observed in discussion #5597)

This commit also sets the CPU cache mode to "write combined" for our
resources since we don't read them back so Metal can optimize them
further with this hint.
pull/5632/head
Qwerasd 2025-02-07 13:49:39 -05:00 committed by GitHub
commit f95f636f1f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 193 additions and 44 deletions

View File

@ -182,15 +182,34 @@ pub const GPUState = struct {
/// This buffer is written exactly once so we can use it globally.
instance: InstanceBuffer, // MTLBuffer
/// The default storage mode to use for resources created with our device.
///
/// This is based on whether the device is a discrete GPU or not, since
/// discrete GPUs do not have unified memory and therefore do not support
/// the "shared" storage mode, instead we have to use the "managed" mode.
default_storage_mode: mtl.MTLResourceOptions.StorageMode,
pub fn init() !GPUState {
const device = try chooseDevice();
const queue = device.msgSend(objc.Object, objc.sel("newCommandQueue"), .{});
errdefer queue.release();
// We determine whether our device is a discrete GPU based on these:
// - We're on macOS (iOS, iPadOS, etc. are guaranteed to be integrated).
// - We're not on aarch64 (Apple Silicon, therefore integrated).
// - The device reports that it does not have unified memory.
const is_discrete =
builtin.target.os.tag == .macos and
builtin.target.cpu.arch != .aarch64 and
!device.getProperty(bool, "hasUnifiedMemory");
const default_storage_mode: mtl.MTLResourceOptions.StorageMode =
if (is_discrete) .managed else .shared;
var instance = try InstanceBuffer.initFill(device, &.{
0, 1, 3, // Top-left triangle
1, 2, 3, // Bottom-right triangle
});
}, .{ .storage_mode = default_storage_mode });
errdefer instance.deinit();
var result: GPUState = .{
@ -198,11 +217,12 @@ pub const GPUState = struct {
.queue = queue,
.instance = instance,
.frames = undefined,
.default_storage_mode = default_storage_mode,
};
// Initialize all of our frame state.
for (&result.frames) |*frame| {
frame.* = try FrameState.init(result.device);
frame.* = try FrameState.init(result.device, default_storage_mode);
}
return result;
@ -288,18 +308,47 @@ pub const FrameState = struct {
const CellBgBuffer = mtl_buffer.Buffer(mtl_shaders.CellBg);
const CellTextBuffer = mtl_buffer.Buffer(mtl_shaders.CellText);
pub fn init(device: objc.Object) !FrameState {
pub fn init(
device: objc.Object,
/// Storage mode for buffers and textures.
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !FrameState {
// Uniform buffer contains exactly 1 uniform struct. The
// uniform data will be undefined so this must be set before
// a frame is drawn.
var uniforms = try UniformBuffer.init(device, 1);
var uniforms = try UniformBuffer.init(
device,
1,
.{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
);
errdefer uniforms.deinit();
// Create the buffers for our vertex data. The preallocation size
// is likely too small but our first frame update will resize it.
var cells = try CellTextBuffer.init(device, 10 * 10);
var cells = try CellTextBuffer.init(
device,
10 * 10,
.{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
);
errdefer cells.deinit();
var cells_bg = try CellBgBuffer.init(device, 10 * 10);
var cells_bg = try CellBgBuffer.init(
device,
10 * 10,
.{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
);
errdefer cells_bg.deinit();
// Initialize our textures for our font atlas.
@ -307,13 +356,13 @@ pub const FrameState = struct {
.data = undefined,
.size = 8,
.format = .grayscale,
});
}, storage_mode);
errdefer grayscale.release();
const color = try initAtlasTexture(device, &.{
.data = undefined,
.size = 8,
.format = .rgba,
});
}, storage_mode);
errdefer color.release();
return .{
@ -1215,7 +1264,11 @@ pub fn updateFrame(
.replace_gray_alpha,
.replace_rgb,
.replace_rgba,
=> try kv.value_ptr.image.upload(self.alloc, self.gpu_state.device),
=> try kv.value_ptr.image.upload(
self.alloc,
self.gpu_state.device,
self.gpu_state.default_storage_mode,
),
.unload_pending,
.unload_replace,
@ -1283,7 +1336,12 @@ pub fn drawFrame(self: *Metal, surface: *apprt.Surface) !void {
self.font_grid.lock.lockShared();
defer self.font_grid.lock.unlockShared();
frame.grayscale_modified = self.font_grid.atlas_grayscale.modified.load(.monotonic);
try syncAtlasTexture(self.gpu_state.device, &self.font_grid.atlas_grayscale, &frame.grayscale);
try syncAtlasTexture(
self.gpu_state.device,
&self.font_grid.atlas_grayscale,
&frame.grayscale,
self.gpu_state.default_storage_mode,
);
}
texture: {
const modified = self.font_grid.atlas_color.modified.load(.monotonic);
@ -1291,7 +1349,12 @@ pub fn drawFrame(self: *Metal, surface: *apprt.Surface) !void {
self.font_grid.lock.lockShared();
defer self.font_grid.lock.unlockShared();
frame.color_modified = self.font_grid.atlas_color.modified.load(.monotonic);
try syncAtlasTexture(self.gpu_state.device, &self.font_grid.atlas_color, &frame.color);
try syncAtlasTexture(
self.gpu_state.device,
&self.font_grid.atlas_color,
&frame.color,
self.gpu_state.default_storage_mode,
);
}
// Command buffer (MTLCommandBuffer)
@ -1618,7 +1681,11 @@ fn drawImagePlacement(
@as(f32, @floatFromInt(p.width)),
@as(f32, @floatFromInt(p.height)),
},
}});
}}, .{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = self.gpu_state.default_storage_mode,
});
defer buf.deinit();
// Set our buffer
@ -3217,14 +3284,20 @@ fn addPreeditCell(
/// Sync the atlas data to the given texture. This copies the bytes
/// associated with the atlas to the given texture. If the atlas no longer
/// fits into the texture, the texture will be resized.
fn syncAtlasTexture(device: objc.Object, atlas: *const font.Atlas, texture: *objc.Object) !void {
fn syncAtlasTexture(
device: objc.Object,
atlas: *const font.Atlas,
texture: *objc.Object,
/// Storage mode for the MTLTexture object
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !void {
const width = texture.getProperty(c_ulong, "width");
if (atlas.size > width) {
// Free our old texture
texture.*.release();
// Reallocate
texture.* = try initAtlasTexture(device, atlas);
texture.* = try initAtlasTexture(device, atlas, storage_mode);
}
texture.msgSend(
@ -3247,7 +3320,12 @@ fn syncAtlasTexture(device: objc.Object, atlas: *const font.Atlas, texture: *obj
}
/// Initialize a MTLTexture object for the given atlas.
fn initAtlasTexture(device: objc.Object, atlas: *const font.Atlas) !objc.Object {
fn initAtlasTexture(
device: objc.Object,
atlas: *const font.Atlas,
/// Storage mode for the MTLTexture object
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !objc.Object {
// Determine our pixel format
const pixel_format: mtl.MTLPixelFormat = switch (atlas.format) {
.grayscale => .r8unorm,
@ -3268,15 +3346,14 @@ fn initAtlasTexture(device: objc.Object, atlas: *const font.Atlas) !objc.Object
desc.setProperty("width", @as(c_ulong, @intCast(atlas.size)));
desc.setProperty("height", @as(c_ulong, @intCast(atlas.size)));
// Xcode tells us that this texture should be shared mode on
// aarch64. This configuration is not supported on x86_64 so
// we only set it on aarch64.
if (comptime builtin.target.cpu.arch == .aarch64) {
desc.setProperty(
"storageMode",
@as(c_ulong, mtl.MTLResourceStorageModeShared),
);
}
desc.setProperty(
"resourceOptions",
mtl.MTLResourceOptions{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
);
// Initialize
const id = device.msgSend(

View File

@ -24,12 +24,36 @@ pub const MTLStoreAction = enum(c_ulong) {
store = 1,
};
/// https://developer.apple.com/documentation/metal/mtlstoragemode?language=objc
pub const MTLStorageMode = enum(c_ulong) {
shared = 0,
managed = 1,
private = 2,
memoryless = 3,
/// https://developer.apple.com/documentation/metal/mtlresourceoptions?language=objc
pub const MTLResourceOptions = packed struct(c_ulong) {
/// https://developer.apple.com/documentation/metal/mtlcpucachemode?language=objc
cpu_cache_mode: CPUCacheMode = .default,
/// https://developer.apple.com/documentation/metal/mtlstoragemode?language=objc
storage_mode: StorageMode,
/// https://developer.apple.com/documentation/metal/mtlhazardtrackingmode?language=objc
hazard_tracking_mode: HazardTrackingMode = .default,
_pad: @Type(.{
.Int = .{ .signedness = .unsigned, .bits = @bitSizeOf(c_ulong) - 10 },
}) = 0,
pub const CPUCacheMode = enum(u4) {
default = 0,
write_combined = 1,
};
pub const StorageMode = enum(u4) {
shared = 0,
managed = 1,
private = 2,
memoryless = 3,
};
pub const HazardTrackingMode = enum(u2) {
default = 0,
untracked = 1,
tracked = 2,
};
};
/// https://developer.apple.com/documentation/metal/mtlprimitivetype?language=objc
@ -139,10 +163,6 @@ pub const MTLTextureUsage = enum(c_ulong) {
pixel_format_view = 8,
};
/// https://developer.apple.com/documentation/metal/mtlresourceoptions?language=objc
/// (incomplete, we only use this mode so we just hardcode it)
pub const MTLResourceStorageModeShared: c_ulong = @intFromEnum(MTLStorageMode.shared) << 4;
pub const MTLClearColor = extern struct {
red: f64,
green: f64,

View File

@ -2,6 +2,7 @@ const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const objc = @import("objc");
const macos = @import("macos");
const mtl = @import("api.zig");
@ -14,35 +15,46 @@ pub fn Buffer(comptime T: type) type {
return struct {
const Self = @This();
/// The resource options for this buffer.
options: mtl.MTLResourceOptions,
buffer: objc.Object, // MTLBuffer
/// Initialize a buffer with the given length pre-allocated.
pub fn init(device: objc.Object, len: usize) !Self {
pub fn init(
device: objc.Object,
len: usize,
options: mtl.MTLResourceOptions,
) !Self {
const buffer = device.msgSend(
objc.Object,
objc.sel("newBufferWithLength:options:"),
.{
@as(c_ulong, @intCast(len * @sizeOf(T))),
mtl.MTLResourceStorageModeShared,
options,
},
);
return .{ .buffer = buffer };
return .{ .buffer = buffer, .options = options };
}
/// Init the buffer filled with the given data.
pub fn initFill(device: objc.Object, data: []const T) !Self {
pub fn initFill(
device: objc.Object,
data: []const T,
options: mtl.MTLResourceOptions,
) !Self {
const buffer = device.msgSend(
objc.Object,
objc.sel("newBufferWithBytes:length:options:"),
.{
@as(*const anyopaque, @ptrCast(data.ptr)),
@as(c_ulong, @intCast(data.len * @sizeOf(T))),
mtl.MTLResourceStorageModeShared,
options,
},
);
return .{ .buffer = buffer };
return .{ .buffer = buffer, .options = options };
}
pub fn deinit(self: *Self) void {
@ -85,7 +97,7 @@ pub fn Buffer(comptime T: type) type {
objc.sel("newBufferWithLength:options:"),
.{
@as(c_ulong, @intCast(size * @sizeOf(T))),
mtl.MTLResourceStorageModeShared,
self.options,
},
);
}
@ -106,6 +118,18 @@ pub fn Buffer(comptime T: type) type {
};
@memcpy(dst, src);
// If we're using the managed resource storage mode, then
// we need to signal Metal to synchronize the buffer data.
//
// Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos?language=objc
if (self.options.storage_mode == .managed) {
self.buffer.msgSend(
void,
"didModifyRange:",
.{macos.foundation.Range.init(0, req_bytes)},
);
}
}
/// Like Buffer.sync but takes data from an array of ArrayLists,
@ -130,7 +154,7 @@ pub fn Buffer(comptime T: type) type {
objc.sel("newBufferWithLength:options:"),
.{
@as(c_ulong, @intCast(size * @sizeOf(T))),
mtl.MTLResourceStorageModeShared,
self.options,
},
);
}
@ -153,6 +177,18 @@ pub fn Buffer(comptime T: type) type {
i += list.items.len * @sizeOf(T);
}
// If we're using the managed resource storage mode, then
// we need to signal Metal to synchronize the buffer data.
//
// Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos?language=objc
if (self.options.storage_mode == .managed) {
self.buffer.msgSend(
void,
"didModifyRange:",
.{macos.foundation.Range.init(0, req_bytes)},
);
}
return total_len;
}
};

View File

@ -358,6 +358,8 @@ pub const Image = union(enum) {
self: *Image,
alloc: Allocator,
device: objc.Object,
/// Storage mode for the MTLTexture object
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !void {
// Convert our data if we have to
try self.convert(alloc);
@ -366,7 +368,7 @@ pub const Image = union(enum) {
const p = self.pending().?;
// Create our texture
const texture = try initTexture(p, device);
const texture = try initTexture(p, device, storage_mode);
errdefer texture.msgSend(void, objc.sel("release"), .{});
// Upload our data
@ -424,7 +426,12 @@ pub const Image = union(enum) {
};
}
fn initTexture(p: Pending, device: objc.Object) !objc.Object {
fn initTexture(
p: Pending,
device: objc.Object,
/// Storage mode for the MTLTexture object
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !objc.Object {
// Create our descriptor
const desc = init: {
const Class = objc.getClass("MTLTextureDescriptor").?;
@ -438,6 +445,15 @@ pub const Image = union(enum) {
desc.setProperty("width", @as(c_ulong, @intCast(p.width)));
desc.setProperty("height", @as(c_ulong, @intCast(p.height)));
desc.setProperty(
"resourceOptions",
mtl.MTLResourceOptions{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
);
// Initialize
const id = device.msgSend(
?*anyopaque,