diff --git a/src/crash/main.zig b/src/crash/main.zig index 5f9aa96c5..1ac971851 100644 --- a/src/crash/main.zig +++ b/src/crash/main.zig @@ -5,7 +5,6 @@ const dir = @import("dir.zig"); const sentry_envelope = @import("sentry_envelope.zig"); -pub const minidump = @import("minidump.zig"); pub const sentry = @import("sentry.zig"); pub const Envelope = sentry_envelope.Envelope; pub const defaultDir = dir.defaultDir; diff --git a/src/crash/minidump.zig b/src/crash/minidump.zig deleted file mode 100644 index 0abd67eae..000000000 --- a/src/crash/minidump.zig +++ /dev/null @@ -1,7 +0,0 @@ -pub const reader = @import("minidump/reader.zig"); -pub const stream = @import("minidump/stream.zig"); -pub const Reader = reader.Reader; - -test { - @import("std").testing.refAllDecls(@This()); -} diff --git a/src/crash/minidump/external.zig b/src/crash/minidump/external.zig deleted file mode 100644 index 451810883..000000000 --- a/src/crash/minidump/external.zig +++ /dev/null @@ -1,59 +0,0 @@ -//! This file contains the external structs and constants for the minidump -//! format. Most are from the Microsoft documentation on the minidump format: -//! https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ -//! -//! Wherever possible, we also compare our definitions to other projects -//! such as rust-minidump, libmdmp, breakpad, etc. to ensure we're doing -//! the right thing. - -/// "MDMP" in little-endian. -pub const signature = 0x504D444D; - -/// The version of the minidump format. -pub const version = 0xA793; - -/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_header -pub const Header = extern struct { - signature: u32, - version: packed struct(u32) { low: u16, high: u16 }, - stream_count: u32, - stream_directory_rva: u32, - checksum: u32, - time_date_stamp: u32, - flags: u64, -}; - -/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_directory -pub const Directory = extern struct { - stream_type: u32, - location: LocationDescriptor, -}; - -/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_location_descriptor -pub const LocationDescriptor = extern struct { - data_size: u32, - rva: u32, -}; - -/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_memory_descriptor -pub const MemoryDescriptor = extern struct { - start_of_memory_range: u64, - memory: LocationDescriptor, -}; - -/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_thread_list -pub const ThreadList = extern struct { - number_of_threads: u32, - threads: [1]Thread, -}; - -/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_thread -pub const Thread = extern struct { - thread_id: u32, - suspend_count: u32, - priority_class: u32, - priority: u32, - teb: u64, - stack: MemoryDescriptor, - thread_context: LocationDescriptor, -}; diff --git a/src/crash/minidump/reader.zig b/src/crash/minidump/reader.zig deleted file mode 100644 index b7f5efe80..000000000 --- a/src/crash/minidump/reader.zig +++ /dev/null @@ -1,242 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; -const Allocator = std.mem.Allocator; -const external = @import("external.zig"); -const stream = @import("stream.zig"); -const EncodedStream = stream.EncodedStream; - -const log = std.log.scoped(.minidump_reader); - -/// Possible minidump-specific errors that can occur when reading a minidump. -/// This isn't the full error set since IO errors can also occur depending -/// on the Source type. -pub const ReadError = error{ - InvalidHeader, - InvalidVersion, - StreamSizeMismatch, -}; - -/// Reader creates a new minidump reader for the given source type. The -/// source must have both a "reader()" and "seekableStream()" function. -/// -/// Given the format of a minidump file, we must keep the source open and -/// continually access it because the format of the minidump is full of -/// pointers and offsets that we must follow depending on the stream types. -/// Also, since we're not aware of all stream types (in fact its impossible -/// to be aware since custom stream types are allowed), its possible any stream -/// type can define their own pointers and offsets. So, the source must always -/// be available so callers can decode the streams as needed. -pub fn Reader(comptime S: type) type { - return struct { - const Self = @This(); - - /// The source data. - source: Source, - - /// The endianness of the minidump file. This is detected by reading - /// the byte order of the header. - endian: std.builtin.Endian, - - /// The number of streams within the minidump file. This is read from - /// the header and stored here so we can quickly access them. Note - /// the stream types require reading the source; this is an optimization - /// to avoid any allocations on the reader and the caller can choose - /// to store them if they want. - stream_count: u32, - stream_directory_rva: u32, - - const SourceCallable = switch (@typeInfo(Source)) { - .pointer => |v| v.child, - .@"struct" => Source, - else => @compileError("Source type must be a pointer or struct"), - }; - - const SourceReader = @typeInfo(@TypeOf(SourceCallable.reader)).@"fn".return_type.?; - const SourceSeeker = @typeInfo(@TypeOf(SourceCallable.seekableStream)).@"fn".return_type.?; - - /// A limited reader for reading data from the source. - pub const LimitedReader = std.io.LimitedReader(SourceReader); - - /// The source type for the reader. - pub const Source = S; - - /// The stream types for reading - pub const ThreadList = stream.thread_list.ThreadListReader(Self); - - /// The reader type for stream reading. This has some other methods so - /// you must still call reader() on the result to get the actual - /// reader to read the data. - pub const StreamReader = struct { - source: Source, - endian: std.builtin.Endian, - directory: external.Directory, - - /// Should not be accessed directly. This is setup whenever - /// reader() is called. - limit_reader: LimitedReader = undefined, - - pub const Reader = LimitedReader.Reader; - - /// Returns a Reader implementation that reads the bytes of the - /// stream. - /// - /// The reader is dependent on the state of Source so any - /// state-changing operations on Source will invalidate the - /// reader. For example, making another reader, reading another - /// stream directory, closing the source, etc. - pub fn reader(self: *StreamReader) LimitedReader.Reader { - try self.source.seekableStream().seekTo(self.directory.location.rva); - self.limit_reader = .{ - .inner_reader = self.source.reader(), - .bytes_left = self.directory.location.data_size, - }; - return self.limit_reader.reader(); - } - - /// Seeks the source to the location of the directory. - pub fn seekToPayload(self: *StreamReader) !void { - try self.source.seekableStream().seekTo(self.directory.location.rva); - } - }; - - /// Iterator type to read over the streams in the minidump file. - pub const StreamIterator = struct { - reader: *const Self, - i: u32 = 0, - - pub fn next(self: *StreamIterator) !?StreamReader { - if (self.i >= self.reader.stream_count) return null; - const dir = try self.reader.directory(self.i); - self.i += 1; - return try self.reader.streamReader(dir); - } - }; - - /// Initialize a reader. The source must remain available for the entire - /// lifetime of the reader. The reader does not take ownership of the - /// source so if it has resources that need to be cleaned up, the caller - /// must do so once the reader is no longer needed. - pub fn init(source: Source) !Self { - const header, const endian = try readHeader(Source, source); - return .{ - .source = source, - .endian = endian, - .stream_count = header.stream_count, - .stream_directory_rva = header.stream_directory_rva, - }; - } - - /// Return an iterator to read over the streams in the minidump file. - /// This is very similar to using a simple for loop to stream_count - /// and calling directory() on each index, but is more idiomatic - /// Zig. - pub fn streamIterator(self: *const Self) StreamIterator { - return .{ .reader = self }; - } - - /// Return a StreamReader for the given directory type. This streams - /// from the underlying source so the returned reader is only valid - /// as long as the source is unmodified (i.e. the source is not - /// closed, the source seek position is not moved, etc.). - pub fn streamReader( - self: *const Self, - dir: external.Directory, - ) SourceSeeker.SeekError!StreamReader { - return .{ - .source = self.source, - .endian = self.endian, - .directory = dir, - }; - } - - /// Get the directory entry with the given index. - /// - /// Asserts the index is valid (idx < stream_count). - pub fn directory(self: *const Self, idx: usize) !external.Directory { - assert(idx < self.stream_count); - - // Seek to the directory. - const offset: u32 = @intCast(@sizeOf(external.Directory) * idx); - const rva: u32 = self.stream_directory_rva + offset; - try self.source.seekableStream().seekTo(rva); - - // Read the directory. - return try self.source.reader().readStructEndian( - external.Directory, - self.endian, - ); - } - - /// Return a reader for the given location descriptor. This is only - /// valid until the reader source is modified in some way. - pub fn locationReader( - self: *const Self, - loc: external.LocationDescriptor, - ) !LimitedReader { - try self.source.seekableStream().seekTo(loc.rva); - return .{ - .inner_reader = self.source.reader(), - .bytes_left = loc.data_size, - }; - } - }; -} - -/// Reads the header for the minidump file and returns endianness of -/// the file. -fn readHeader(comptime T: type, source: T) !struct { - external.Header, - std.builtin.Endian, -} { - // Start by trying LE. - var endian: std.builtin.Endian = .little; - var header = try source.reader().readStructEndian(external.Header, endian); - - // If the signature doesn't match, we assume its BE. - if (header.signature != external.signature) { - // Seek back to the start of the file so we can reread. - try source.seekableStream().seekTo(0); - - // Try BE, if the signature doesn't match, return an error. - endian = .big; - header = try source.reader().readStructEndian(external.Header, endian); - if (header.signature != external.signature) return ReadError.InvalidHeader; - } - - // "The low-order word is MINIDUMP_VERSION. The high-order word is an - // internal value that is implementation specific." - if (header.version.low != external.version) return ReadError.InvalidVersion; - - return .{ header, endian }; -} - -// Uncomment to dump some debug information for a minidump file. -test "minidump debug" { - var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp")); - const r = try Reader(*@TypeOf(fbs)).init(&fbs); - var it = r.streamIterator(); - while (try it.next()) |s| { - log.warn("directory i={} dir={}", .{ it.i - 1, s.directory }); - } -} - -test "minidump read" { - const testing = std.testing; - const alloc = testing.allocator; - - var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp")); - const r = try Reader(*@TypeOf(fbs)).init(&fbs); - try testing.expectEqual(std.builtin.Endian.little, r.endian); - try testing.expectEqual(7, r.stream_count); - { - const dir = try r.directory(0); - try testing.expectEqual(3, dir.stream_type); - try testing.expectEqual(584, dir.location.data_size); - - var bytes = std.ArrayList(u8).init(alloc); - defer bytes.deinit(); - var sr = try r.streamReader(dir); - try sr.reader().readAllArrayList(&bytes, std.math.maxInt(usize)); - try testing.expectEqual(584, bytes.items.len); - } -} diff --git a/src/crash/minidump/stream.zig b/src/crash/minidump/stream.zig deleted file mode 100644 index 00ec6b042..000000000 --- a/src/crash/minidump/stream.zig +++ /dev/null @@ -1,30 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; -const Allocator = std.mem.Allocator; - -const log = std.log.scoped(.minidump_stream); - -/// The known stream types. -pub const thread_list = @import("stream_threadlist.zig"); - -/// A stream within the minidump file. A stream can be either in an encoded -/// form or decoded form. The encoded form are raw bytes and aren't validated -/// until they're decoded. The decoded form is a structured form of the stream. -/// -/// The decoded form is more ergonomic to work with but the encoded form is -/// more efficient to read/write. -pub const Stream = union(enum) { - encoded: EncodedStream, -}; - -/// An encoded stream value. It is "encoded" in the sense that it is raw bytes -/// with a type associated. The raw bytes are not validated to be correct for -/// the type. -pub const EncodedStream = struct { - type: u32, - data: []const u8, -}; - -test { - @import("std").testing.refAllDecls(@This()); -} diff --git a/src/crash/minidump/stream_threadlist.zig b/src/crash/minidump/stream_threadlist.zig deleted file mode 100644 index 51f3f9d4c..000000000 --- a/src/crash/minidump/stream_threadlist.zig +++ /dev/null @@ -1,117 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; -const external = @import("external.zig"); -const readerpkg = @import("reader.zig"); -const Reader = readerpkg.Reader; -const ReadError = readerpkg.ReadError; - -const log = std.log.scoped(.minidump_stream); - -/// This is the list of threads from the process. -/// -/// This is the Reader implementation. You usually do not use this directly. -/// Instead, use Reader(T).ThreadList which will get you the same thing. -/// -/// ThreadList is stream type 0x3. -/// StreamReader is the Reader(T).StreamReader type. -pub fn ThreadListReader(comptime R: type) type { - return struct { - const Self = @This(); - - /// The number of threads in the list. - count: u32, - - /// The rva to the first thread in the list. - rva: u32, - - /// Source data and endianness so we can read. - source: R.Source, - endian: std.builtin.Endian, - - pub fn init(r: *R.StreamReader) !Self { - assert(r.directory.stream_type == 0x3); - try r.seekToPayload(); - const reader = r.source.reader(); - - // Our count is always a u32 in the header. - const count = try reader.readInt(u32, r.endian); - - // Determine if we have padding in our header. It is possible - // for there to be padding if the list header was written by - // a 32-bit process but is being read on a 64-bit process. - const padding = padding: { - const maybe_size = @sizeOf(u32) + (@sizeOf(external.Thread) * count); - switch (std.math.order(maybe_size, r.directory.location.data_size)) { - // It should never be larger than what the directory says. - .gt => return ReadError.StreamSizeMismatch, - - // If the sizes match exactly we're good. - .eq => break :padding 0, - - .lt => { - const padding = r.directory.location.data_size - maybe_size; - if (padding != 4) return ReadError.StreamSizeMismatch; - break :padding padding; - }, - } - }; - - // Rva is the location of the first thread in the list. - const rva = r.directory.location.rva + @as(u32, @sizeOf(u32)) + padding; - - return .{ - .count = count, - .rva = rva, - .source = r.source, - .endian = r.endian, - }; - } - - /// Get the thread entry for the given index. - /// - /// Index is asserted to be less than count. - pub fn thread(self: *const Self, i: usize) !external.Thread { - assert(i < self.count); - - // Seek to the thread - const offset: u32 = @intCast(@sizeOf(external.Thread) * i); - const rva: u32 = self.rva + offset; - try self.source.seekableStream().seekTo(rva); - - // Read the thread - return try self.source.reader().readStructEndian( - external.Thread, - self.endian, - ); - } - }; -} - -test "minidump: threadlist" { - const testing = std.testing; - const alloc = testing.allocator; - - var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp")); - const R = Reader(*@TypeOf(fbs)); - const r = try R.init(&fbs); - - // Get our thread list stream - const dir = try r.directory(0); - try testing.expectEqual(3, dir.stream_type); - var sr = try r.streamReader(dir); - - // Get our rich structure - const v = try R.ThreadList.init(&sr); - log.warn("threadlist count={} rva={}", .{ v.count, v.rva }); - - try testing.expectEqual(12, v.count); - for (0..v.count) |i| { - const t = try v.thread(i); - log.warn("thread i={} thread={}", .{ i, t }); - - // Read our stack memory - var stack_reader = try r.locationReader(t.stack.memory); - const bytes = try stack_reader.reader().readAllAlloc(alloc, t.stack.memory.data_size); - defer alloc.free(bytes); - } -}