Metal: use "Managed" resource storage mode on discrete GPUs (#5625)
Discrete GPUs cannot use the "shared" storage mode. This causes undefined behavior right now, and I believe it's what's causing a problem on Intel systems with discrete GPUs with "inverted" cells. (Observed in discussion #5597) This commit also sets the CPU cache mode to "write combined" for our resources since we don't read them back so Metal can optimize them further with this hint.pull/5632/head
commit
f95f636f1f
|
|
@ -182,15 +182,34 @@ pub const GPUState = struct {
|
|||
/// This buffer is written exactly once so we can use it globally.
|
||||
instance: InstanceBuffer, // MTLBuffer
|
||||
|
||||
/// The default storage mode to use for resources created with our device.
|
||||
///
|
||||
/// This is based on whether the device is a discrete GPU or not, since
|
||||
/// discrete GPUs do not have unified memory and therefore do not support
|
||||
/// the "shared" storage mode, instead we have to use the "managed" mode.
|
||||
default_storage_mode: mtl.MTLResourceOptions.StorageMode,
|
||||
|
||||
pub fn init() !GPUState {
|
||||
const device = try chooseDevice();
|
||||
const queue = device.msgSend(objc.Object, objc.sel("newCommandQueue"), .{});
|
||||
errdefer queue.release();
|
||||
|
||||
// We determine whether our device is a discrete GPU based on these:
|
||||
// - We're on macOS (iOS, iPadOS, etc. are guaranteed to be integrated).
|
||||
// - We're not on aarch64 (Apple Silicon, therefore integrated).
|
||||
// - The device reports that it does not have unified memory.
|
||||
const is_discrete =
|
||||
builtin.target.os.tag == .macos and
|
||||
builtin.target.cpu.arch != .aarch64 and
|
||||
!device.getProperty(bool, "hasUnifiedMemory");
|
||||
|
||||
const default_storage_mode: mtl.MTLResourceOptions.StorageMode =
|
||||
if (is_discrete) .managed else .shared;
|
||||
|
||||
var instance = try InstanceBuffer.initFill(device, &.{
|
||||
0, 1, 3, // Top-left triangle
|
||||
1, 2, 3, // Bottom-right triangle
|
||||
});
|
||||
}, .{ .storage_mode = default_storage_mode });
|
||||
errdefer instance.deinit();
|
||||
|
||||
var result: GPUState = .{
|
||||
|
|
@ -198,11 +217,12 @@ pub const GPUState = struct {
|
|||
.queue = queue,
|
||||
.instance = instance,
|
||||
.frames = undefined,
|
||||
.default_storage_mode = default_storage_mode,
|
||||
};
|
||||
|
||||
// Initialize all of our frame state.
|
||||
for (&result.frames) |*frame| {
|
||||
frame.* = try FrameState.init(result.device);
|
||||
frame.* = try FrameState.init(result.device, default_storage_mode);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
@ -288,18 +308,47 @@ pub const FrameState = struct {
|
|||
const CellBgBuffer = mtl_buffer.Buffer(mtl_shaders.CellBg);
|
||||
const CellTextBuffer = mtl_buffer.Buffer(mtl_shaders.CellText);
|
||||
|
||||
pub fn init(device: objc.Object) !FrameState {
|
||||
pub fn init(
|
||||
device: objc.Object,
|
||||
/// Storage mode for buffers and textures.
|
||||
storage_mode: mtl.MTLResourceOptions.StorageMode,
|
||||
) !FrameState {
|
||||
// Uniform buffer contains exactly 1 uniform struct. The
|
||||
// uniform data will be undefined so this must be set before
|
||||
// a frame is drawn.
|
||||
var uniforms = try UniformBuffer.init(device, 1);
|
||||
var uniforms = try UniformBuffer.init(
|
||||
device,
|
||||
1,
|
||||
.{
|
||||
// Indicate that the CPU writes to this resource but never reads it.
|
||||
.cpu_cache_mode = .write_combined,
|
||||
.storage_mode = storage_mode,
|
||||
},
|
||||
);
|
||||
errdefer uniforms.deinit();
|
||||
|
||||
// Create the buffers for our vertex data. The preallocation size
|
||||
// is likely too small but our first frame update will resize it.
|
||||
var cells = try CellTextBuffer.init(device, 10 * 10);
|
||||
var cells = try CellTextBuffer.init(
|
||||
device,
|
||||
10 * 10,
|
||||
.{
|
||||
// Indicate that the CPU writes to this resource but never reads it.
|
||||
.cpu_cache_mode = .write_combined,
|
||||
.storage_mode = storage_mode,
|
||||
},
|
||||
);
|
||||
errdefer cells.deinit();
|
||||
var cells_bg = try CellBgBuffer.init(device, 10 * 10);
|
||||
var cells_bg = try CellBgBuffer.init(
|
||||
device,
|
||||
10 * 10,
|
||||
.{
|
||||
// Indicate that the CPU writes to this resource but never reads it.
|
||||
.cpu_cache_mode = .write_combined,
|
||||
.storage_mode = storage_mode,
|
||||
},
|
||||
);
|
||||
|
||||
errdefer cells_bg.deinit();
|
||||
|
||||
// Initialize our textures for our font atlas.
|
||||
|
|
@ -307,13 +356,13 @@ pub const FrameState = struct {
|
|||
.data = undefined,
|
||||
.size = 8,
|
||||
.format = .grayscale,
|
||||
});
|
||||
}, storage_mode);
|
||||
errdefer grayscale.release();
|
||||
const color = try initAtlasTexture(device, &.{
|
||||
.data = undefined,
|
||||
.size = 8,
|
||||
.format = .rgba,
|
||||
});
|
||||
}, storage_mode);
|
||||
errdefer color.release();
|
||||
|
||||
return .{
|
||||
|
|
@ -1215,7 +1264,11 @@ pub fn updateFrame(
|
|||
.replace_gray_alpha,
|
||||
.replace_rgb,
|
||||
.replace_rgba,
|
||||
=> try kv.value_ptr.image.upload(self.alloc, self.gpu_state.device),
|
||||
=> try kv.value_ptr.image.upload(
|
||||
self.alloc,
|
||||
self.gpu_state.device,
|
||||
self.gpu_state.default_storage_mode,
|
||||
),
|
||||
|
||||
.unload_pending,
|
||||
.unload_replace,
|
||||
|
|
@ -1283,7 +1336,12 @@ pub fn drawFrame(self: *Metal, surface: *apprt.Surface) !void {
|
|||
self.font_grid.lock.lockShared();
|
||||
defer self.font_grid.lock.unlockShared();
|
||||
frame.grayscale_modified = self.font_grid.atlas_grayscale.modified.load(.monotonic);
|
||||
try syncAtlasTexture(self.gpu_state.device, &self.font_grid.atlas_grayscale, &frame.grayscale);
|
||||
try syncAtlasTexture(
|
||||
self.gpu_state.device,
|
||||
&self.font_grid.atlas_grayscale,
|
||||
&frame.grayscale,
|
||||
self.gpu_state.default_storage_mode,
|
||||
);
|
||||
}
|
||||
texture: {
|
||||
const modified = self.font_grid.atlas_color.modified.load(.monotonic);
|
||||
|
|
@ -1291,7 +1349,12 @@ pub fn drawFrame(self: *Metal, surface: *apprt.Surface) !void {
|
|||
self.font_grid.lock.lockShared();
|
||||
defer self.font_grid.lock.unlockShared();
|
||||
frame.color_modified = self.font_grid.atlas_color.modified.load(.monotonic);
|
||||
try syncAtlasTexture(self.gpu_state.device, &self.font_grid.atlas_color, &frame.color);
|
||||
try syncAtlasTexture(
|
||||
self.gpu_state.device,
|
||||
&self.font_grid.atlas_color,
|
||||
&frame.color,
|
||||
self.gpu_state.default_storage_mode,
|
||||
);
|
||||
}
|
||||
|
||||
// Command buffer (MTLCommandBuffer)
|
||||
|
|
@ -1618,7 +1681,11 @@ fn drawImagePlacement(
|
|||
@as(f32, @floatFromInt(p.width)),
|
||||
@as(f32, @floatFromInt(p.height)),
|
||||
},
|
||||
}});
|
||||
}}, .{
|
||||
// Indicate that the CPU writes to this resource but never reads it.
|
||||
.cpu_cache_mode = .write_combined,
|
||||
.storage_mode = self.gpu_state.default_storage_mode,
|
||||
});
|
||||
defer buf.deinit();
|
||||
|
||||
// Set our buffer
|
||||
|
|
@ -3217,14 +3284,20 @@ fn addPreeditCell(
|
|||
/// Sync the atlas data to the given texture. This copies the bytes
|
||||
/// associated with the atlas to the given texture. If the atlas no longer
|
||||
/// fits into the texture, the texture will be resized.
|
||||
fn syncAtlasTexture(device: objc.Object, atlas: *const font.Atlas, texture: *objc.Object) !void {
|
||||
fn syncAtlasTexture(
|
||||
device: objc.Object,
|
||||
atlas: *const font.Atlas,
|
||||
texture: *objc.Object,
|
||||
/// Storage mode for the MTLTexture object
|
||||
storage_mode: mtl.MTLResourceOptions.StorageMode,
|
||||
) !void {
|
||||
const width = texture.getProperty(c_ulong, "width");
|
||||
if (atlas.size > width) {
|
||||
// Free our old texture
|
||||
texture.*.release();
|
||||
|
||||
// Reallocate
|
||||
texture.* = try initAtlasTexture(device, atlas);
|
||||
texture.* = try initAtlasTexture(device, atlas, storage_mode);
|
||||
}
|
||||
|
||||
texture.msgSend(
|
||||
|
|
@ -3247,7 +3320,12 @@ fn syncAtlasTexture(device: objc.Object, atlas: *const font.Atlas, texture: *obj
|
|||
}
|
||||
|
||||
/// Initialize a MTLTexture object for the given atlas.
|
||||
fn initAtlasTexture(device: objc.Object, atlas: *const font.Atlas) !objc.Object {
|
||||
fn initAtlasTexture(
|
||||
device: objc.Object,
|
||||
atlas: *const font.Atlas,
|
||||
/// Storage mode for the MTLTexture object
|
||||
storage_mode: mtl.MTLResourceOptions.StorageMode,
|
||||
) !objc.Object {
|
||||
// Determine our pixel format
|
||||
const pixel_format: mtl.MTLPixelFormat = switch (atlas.format) {
|
||||
.grayscale => .r8unorm,
|
||||
|
|
@ -3268,15 +3346,14 @@ fn initAtlasTexture(device: objc.Object, atlas: *const font.Atlas) !objc.Object
|
|||
desc.setProperty("width", @as(c_ulong, @intCast(atlas.size)));
|
||||
desc.setProperty("height", @as(c_ulong, @intCast(atlas.size)));
|
||||
|
||||
// Xcode tells us that this texture should be shared mode on
|
||||
// aarch64. This configuration is not supported on x86_64 so
|
||||
// we only set it on aarch64.
|
||||
if (comptime builtin.target.cpu.arch == .aarch64) {
|
||||
desc.setProperty(
|
||||
"storageMode",
|
||||
@as(c_ulong, mtl.MTLResourceStorageModeShared),
|
||||
);
|
||||
}
|
||||
desc.setProperty(
|
||||
"resourceOptions",
|
||||
mtl.MTLResourceOptions{
|
||||
// Indicate that the CPU writes to this resource but never reads it.
|
||||
.cpu_cache_mode = .write_combined,
|
||||
.storage_mode = storage_mode,
|
||||
},
|
||||
);
|
||||
|
||||
// Initialize
|
||||
const id = device.msgSend(
|
||||
|
|
|
|||
|
|
@ -24,12 +24,36 @@ pub const MTLStoreAction = enum(c_ulong) {
|
|||
store = 1,
|
||||
};
|
||||
|
||||
/// https://developer.apple.com/documentation/metal/mtlstoragemode?language=objc
|
||||
pub const MTLStorageMode = enum(c_ulong) {
|
||||
shared = 0,
|
||||
managed = 1,
|
||||
private = 2,
|
||||
memoryless = 3,
|
||||
/// https://developer.apple.com/documentation/metal/mtlresourceoptions?language=objc
|
||||
pub const MTLResourceOptions = packed struct(c_ulong) {
|
||||
/// https://developer.apple.com/documentation/metal/mtlcpucachemode?language=objc
|
||||
cpu_cache_mode: CPUCacheMode = .default,
|
||||
/// https://developer.apple.com/documentation/metal/mtlstoragemode?language=objc
|
||||
storage_mode: StorageMode,
|
||||
/// https://developer.apple.com/documentation/metal/mtlhazardtrackingmode?language=objc
|
||||
hazard_tracking_mode: HazardTrackingMode = .default,
|
||||
|
||||
_pad: @Type(.{
|
||||
.Int = .{ .signedness = .unsigned, .bits = @bitSizeOf(c_ulong) - 10 },
|
||||
}) = 0,
|
||||
|
||||
pub const CPUCacheMode = enum(u4) {
|
||||
default = 0,
|
||||
write_combined = 1,
|
||||
};
|
||||
|
||||
pub const StorageMode = enum(u4) {
|
||||
shared = 0,
|
||||
managed = 1,
|
||||
private = 2,
|
||||
memoryless = 3,
|
||||
};
|
||||
|
||||
pub const HazardTrackingMode = enum(u2) {
|
||||
default = 0,
|
||||
untracked = 1,
|
||||
tracked = 2,
|
||||
};
|
||||
};
|
||||
|
||||
/// https://developer.apple.com/documentation/metal/mtlprimitivetype?language=objc
|
||||
|
|
@ -139,10 +163,6 @@ pub const MTLTextureUsage = enum(c_ulong) {
|
|||
pixel_format_view = 8,
|
||||
};
|
||||
|
||||
/// https://developer.apple.com/documentation/metal/mtlresourceoptions?language=objc
|
||||
/// (incomplete, we only use this mode so we just hardcode it)
|
||||
pub const MTLResourceStorageModeShared: c_ulong = @intFromEnum(MTLStorageMode.shared) << 4;
|
||||
|
||||
pub const MTLClearColor = extern struct {
|
||||
red: f64,
|
||||
green: f64,
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ const std = @import("std");
|
|||
const Allocator = std.mem.Allocator;
|
||||
const assert = std.debug.assert;
|
||||
const objc = @import("objc");
|
||||
const macos = @import("macos");
|
||||
|
||||
const mtl = @import("api.zig");
|
||||
|
||||
|
|
@ -14,35 +15,46 @@ pub fn Buffer(comptime T: type) type {
|
|||
return struct {
|
||||
const Self = @This();
|
||||
|
||||
/// The resource options for this buffer.
|
||||
options: mtl.MTLResourceOptions,
|
||||
|
||||
buffer: objc.Object, // MTLBuffer
|
||||
|
||||
/// Initialize a buffer with the given length pre-allocated.
|
||||
pub fn init(device: objc.Object, len: usize) !Self {
|
||||
pub fn init(
|
||||
device: objc.Object,
|
||||
len: usize,
|
||||
options: mtl.MTLResourceOptions,
|
||||
) !Self {
|
||||
const buffer = device.msgSend(
|
||||
objc.Object,
|
||||
objc.sel("newBufferWithLength:options:"),
|
||||
.{
|
||||
@as(c_ulong, @intCast(len * @sizeOf(T))),
|
||||
mtl.MTLResourceStorageModeShared,
|
||||
options,
|
||||
},
|
||||
);
|
||||
|
||||
return .{ .buffer = buffer };
|
||||
return .{ .buffer = buffer, .options = options };
|
||||
}
|
||||
|
||||
/// Init the buffer filled with the given data.
|
||||
pub fn initFill(device: objc.Object, data: []const T) !Self {
|
||||
pub fn initFill(
|
||||
device: objc.Object,
|
||||
data: []const T,
|
||||
options: mtl.MTLResourceOptions,
|
||||
) !Self {
|
||||
const buffer = device.msgSend(
|
||||
objc.Object,
|
||||
objc.sel("newBufferWithBytes:length:options:"),
|
||||
.{
|
||||
@as(*const anyopaque, @ptrCast(data.ptr)),
|
||||
@as(c_ulong, @intCast(data.len * @sizeOf(T))),
|
||||
mtl.MTLResourceStorageModeShared,
|
||||
options,
|
||||
},
|
||||
);
|
||||
|
||||
return .{ .buffer = buffer };
|
||||
return .{ .buffer = buffer, .options = options };
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
|
|
@ -85,7 +97,7 @@ pub fn Buffer(comptime T: type) type {
|
|||
objc.sel("newBufferWithLength:options:"),
|
||||
.{
|
||||
@as(c_ulong, @intCast(size * @sizeOf(T))),
|
||||
mtl.MTLResourceStorageModeShared,
|
||||
self.options,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
|
@ -106,6 +118,18 @@ pub fn Buffer(comptime T: type) type {
|
|||
};
|
||||
|
||||
@memcpy(dst, src);
|
||||
|
||||
// If we're using the managed resource storage mode, then
|
||||
// we need to signal Metal to synchronize the buffer data.
|
||||
//
|
||||
// Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos?language=objc
|
||||
if (self.options.storage_mode == .managed) {
|
||||
self.buffer.msgSend(
|
||||
void,
|
||||
"didModifyRange:",
|
||||
.{macos.foundation.Range.init(0, req_bytes)},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Like Buffer.sync but takes data from an array of ArrayLists,
|
||||
|
|
@ -130,7 +154,7 @@ pub fn Buffer(comptime T: type) type {
|
|||
objc.sel("newBufferWithLength:options:"),
|
||||
.{
|
||||
@as(c_ulong, @intCast(size * @sizeOf(T))),
|
||||
mtl.MTLResourceStorageModeShared,
|
||||
self.options,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
|
@ -153,6 +177,18 @@ pub fn Buffer(comptime T: type) type {
|
|||
i += list.items.len * @sizeOf(T);
|
||||
}
|
||||
|
||||
// If we're using the managed resource storage mode, then
|
||||
// we need to signal Metal to synchronize the buffer data.
|
||||
//
|
||||
// Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos?language=objc
|
||||
if (self.options.storage_mode == .managed) {
|
||||
self.buffer.msgSend(
|
||||
void,
|
||||
"didModifyRange:",
|
||||
.{macos.foundation.Range.init(0, req_bytes)},
|
||||
);
|
||||
}
|
||||
|
||||
return total_len;
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -358,6 +358,8 @@ pub const Image = union(enum) {
|
|||
self: *Image,
|
||||
alloc: Allocator,
|
||||
device: objc.Object,
|
||||
/// Storage mode for the MTLTexture object
|
||||
storage_mode: mtl.MTLResourceOptions.StorageMode,
|
||||
) !void {
|
||||
// Convert our data if we have to
|
||||
try self.convert(alloc);
|
||||
|
|
@ -366,7 +368,7 @@ pub const Image = union(enum) {
|
|||
const p = self.pending().?;
|
||||
|
||||
// Create our texture
|
||||
const texture = try initTexture(p, device);
|
||||
const texture = try initTexture(p, device, storage_mode);
|
||||
errdefer texture.msgSend(void, objc.sel("release"), .{});
|
||||
|
||||
// Upload our data
|
||||
|
|
@ -424,7 +426,12 @@ pub const Image = union(enum) {
|
|||
};
|
||||
}
|
||||
|
||||
fn initTexture(p: Pending, device: objc.Object) !objc.Object {
|
||||
fn initTexture(
|
||||
p: Pending,
|
||||
device: objc.Object,
|
||||
/// Storage mode for the MTLTexture object
|
||||
storage_mode: mtl.MTLResourceOptions.StorageMode,
|
||||
) !objc.Object {
|
||||
// Create our descriptor
|
||||
const desc = init: {
|
||||
const Class = objc.getClass("MTLTextureDescriptor").?;
|
||||
|
|
@ -438,6 +445,15 @@ pub const Image = union(enum) {
|
|||
desc.setProperty("width", @as(c_ulong, @intCast(p.width)));
|
||||
desc.setProperty("height", @as(c_ulong, @intCast(p.height)));
|
||||
|
||||
desc.setProperty(
|
||||
"resourceOptions",
|
||||
mtl.MTLResourceOptions{
|
||||
// Indicate that the CPU writes to this resource but never reads it.
|
||||
.cpu_cache_mode = .write_combined,
|
||||
.storage_mode = storage_mode,
|
||||
},
|
||||
);
|
||||
|
||||
// Initialize
|
||||
const id = device.msgSend(
|
||||
?*anyopaque,
|
||||
|
|
|
|||
Loading…
Reference in New Issue