crash: remove minidump parser (#9000)

We never used it because our minidump files on Linux didn't contain
meaningful information. With Zig's Writergate, let's drop this and
rewrite it later, we can always resurrect it from the git history.

Rejoice @pluiedev
pull/9001/head
Mitchell Hashimoto 2025-10-02 11:23:45 -07:00 committed by GitHub
commit 16dab3b8dc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 0 additions and 456 deletions

View File

@ -5,7 +5,6 @@
const dir = @import("dir.zig");
const sentry_envelope = @import("sentry_envelope.zig");
pub const minidump = @import("minidump.zig");
pub const sentry = @import("sentry.zig");
pub const Envelope = sentry_envelope.Envelope;
pub const defaultDir = dir.defaultDir;

View File

@ -1,7 +0,0 @@
pub const reader = @import("minidump/reader.zig");
pub const stream = @import("minidump/stream.zig");
pub const Reader = reader.Reader;
test {
@import("std").testing.refAllDecls(@This());
}

View File

@ -1,59 +0,0 @@
//! This file contains the external structs and constants for the minidump
//! format. Most are from the Microsoft documentation on the minidump format:
//! https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/
//!
//! Wherever possible, we also compare our definitions to other projects
//! such as rust-minidump, libmdmp, breakpad, etc. to ensure we're doing
//! the right thing.
/// "MDMP" in little-endian.
pub const signature = 0x504D444D;
/// The version of the minidump format.
pub const version = 0xA793;
/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_header
pub const Header = extern struct {
signature: u32,
version: packed struct(u32) { low: u16, high: u16 },
stream_count: u32,
stream_directory_rva: u32,
checksum: u32,
time_date_stamp: u32,
flags: u64,
};
/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_directory
pub const Directory = extern struct {
stream_type: u32,
location: LocationDescriptor,
};
/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_location_descriptor
pub const LocationDescriptor = extern struct {
data_size: u32,
rva: u32,
};
/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_memory_descriptor
pub const MemoryDescriptor = extern struct {
start_of_memory_range: u64,
memory: LocationDescriptor,
};
/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_thread_list
pub const ThreadList = extern struct {
number_of_threads: u32,
threads: [1]Thread,
};
/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_thread
pub const Thread = extern struct {
thread_id: u32,
suspend_count: u32,
priority_class: u32,
priority: u32,
teb: u64,
stack: MemoryDescriptor,
thread_context: LocationDescriptor,
};

View File

@ -1,242 +0,0 @@
const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const external = @import("external.zig");
const stream = @import("stream.zig");
const EncodedStream = stream.EncodedStream;
const log = std.log.scoped(.minidump_reader);
/// Possible minidump-specific errors that can occur when reading a minidump.
/// This isn't the full error set since IO errors can also occur depending
/// on the Source type.
pub const ReadError = error{
InvalidHeader,
InvalidVersion,
StreamSizeMismatch,
};
/// Reader creates a new minidump reader for the given source type. The
/// source must have both a "reader()" and "seekableStream()" function.
///
/// Given the format of a minidump file, we must keep the source open and
/// continually access it because the format of the minidump is full of
/// pointers and offsets that we must follow depending on the stream types.
/// Also, since we're not aware of all stream types (in fact its impossible
/// to be aware since custom stream types are allowed), its possible any stream
/// type can define their own pointers and offsets. So, the source must always
/// be available so callers can decode the streams as needed.
pub fn Reader(comptime S: type) type {
return struct {
const Self = @This();
/// The source data.
source: Source,
/// The endianness of the minidump file. This is detected by reading
/// the byte order of the header.
endian: std.builtin.Endian,
/// The number of streams within the minidump file. This is read from
/// the header and stored here so we can quickly access them. Note
/// the stream types require reading the source; this is an optimization
/// to avoid any allocations on the reader and the caller can choose
/// to store them if they want.
stream_count: u32,
stream_directory_rva: u32,
const SourceCallable = switch (@typeInfo(Source)) {
.pointer => |v| v.child,
.@"struct" => Source,
else => @compileError("Source type must be a pointer or struct"),
};
const SourceReader = @typeInfo(@TypeOf(SourceCallable.reader)).@"fn".return_type.?;
const SourceSeeker = @typeInfo(@TypeOf(SourceCallable.seekableStream)).@"fn".return_type.?;
/// A limited reader for reading data from the source.
pub const LimitedReader = std.io.LimitedReader(SourceReader);
/// The source type for the reader.
pub const Source = S;
/// The stream types for reading
pub const ThreadList = stream.thread_list.ThreadListReader(Self);
/// The reader type for stream reading. This has some other methods so
/// you must still call reader() on the result to get the actual
/// reader to read the data.
pub const StreamReader = struct {
source: Source,
endian: std.builtin.Endian,
directory: external.Directory,
/// Should not be accessed directly. This is setup whenever
/// reader() is called.
limit_reader: LimitedReader = undefined,
pub const Reader = LimitedReader.Reader;
/// Returns a Reader implementation that reads the bytes of the
/// stream.
///
/// The reader is dependent on the state of Source so any
/// state-changing operations on Source will invalidate the
/// reader. For example, making another reader, reading another
/// stream directory, closing the source, etc.
pub fn reader(self: *StreamReader) LimitedReader.Reader {
try self.source.seekableStream().seekTo(self.directory.location.rva);
self.limit_reader = .{
.inner_reader = self.source.reader(),
.bytes_left = self.directory.location.data_size,
};
return self.limit_reader.reader();
}
/// Seeks the source to the location of the directory.
pub fn seekToPayload(self: *StreamReader) !void {
try self.source.seekableStream().seekTo(self.directory.location.rva);
}
};
/// Iterator type to read over the streams in the minidump file.
pub const StreamIterator = struct {
reader: *const Self,
i: u32 = 0,
pub fn next(self: *StreamIterator) !?StreamReader {
if (self.i >= self.reader.stream_count) return null;
const dir = try self.reader.directory(self.i);
self.i += 1;
return try self.reader.streamReader(dir);
}
};
/// Initialize a reader. The source must remain available for the entire
/// lifetime of the reader. The reader does not take ownership of the
/// source so if it has resources that need to be cleaned up, the caller
/// must do so once the reader is no longer needed.
pub fn init(source: Source) !Self {
const header, const endian = try readHeader(Source, source);
return .{
.source = source,
.endian = endian,
.stream_count = header.stream_count,
.stream_directory_rva = header.stream_directory_rva,
};
}
/// Return an iterator to read over the streams in the minidump file.
/// This is very similar to using a simple for loop to stream_count
/// and calling directory() on each index, but is more idiomatic
/// Zig.
pub fn streamIterator(self: *const Self) StreamIterator {
return .{ .reader = self };
}
/// Return a StreamReader for the given directory type. This streams
/// from the underlying source so the returned reader is only valid
/// as long as the source is unmodified (i.e. the source is not
/// closed, the source seek position is not moved, etc.).
pub fn streamReader(
self: *const Self,
dir: external.Directory,
) SourceSeeker.SeekError!StreamReader {
return .{
.source = self.source,
.endian = self.endian,
.directory = dir,
};
}
/// Get the directory entry with the given index.
///
/// Asserts the index is valid (idx < stream_count).
pub fn directory(self: *const Self, idx: usize) !external.Directory {
assert(idx < self.stream_count);
// Seek to the directory.
const offset: u32 = @intCast(@sizeOf(external.Directory) * idx);
const rva: u32 = self.stream_directory_rva + offset;
try self.source.seekableStream().seekTo(rva);
// Read the directory.
return try self.source.reader().readStructEndian(
external.Directory,
self.endian,
);
}
/// Return a reader for the given location descriptor. This is only
/// valid until the reader source is modified in some way.
pub fn locationReader(
self: *const Self,
loc: external.LocationDescriptor,
) !LimitedReader {
try self.source.seekableStream().seekTo(loc.rva);
return .{
.inner_reader = self.source.reader(),
.bytes_left = loc.data_size,
};
}
};
}
/// Reads the header for the minidump file and returns endianness of
/// the file.
fn readHeader(comptime T: type, source: T) !struct {
external.Header,
std.builtin.Endian,
} {
// Start by trying LE.
var endian: std.builtin.Endian = .little;
var header = try source.reader().readStructEndian(external.Header, endian);
// If the signature doesn't match, we assume its BE.
if (header.signature != external.signature) {
// Seek back to the start of the file so we can reread.
try source.seekableStream().seekTo(0);
// Try BE, if the signature doesn't match, return an error.
endian = .big;
header = try source.reader().readStructEndian(external.Header, endian);
if (header.signature != external.signature) return ReadError.InvalidHeader;
}
// "The low-order word is MINIDUMP_VERSION. The high-order word is an
// internal value that is implementation specific."
if (header.version.low != external.version) return ReadError.InvalidVersion;
return .{ header, endian };
}
// Uncomment to dump some debug information for a minidump file.
test "minidump debug" {
var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp"));
const r = try Reader(*@TypeOf(fbs)).init(&fbs);
var it = r.streamIterator();
while (try it.next()) |s| {
log.warn("directory i={} dir={}", .{ it.i - 1, s.directory });
}
}
test "minidump read" {
const testing = std.testing;
const alloc = testing.allocator;
var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp"));
const r = try Reader(*@TypeOf(fbs)).init(&fbs);
try testing.expectEqual(std.builtin.Endian.little, r.endian);
try testing.expectEqual(7, r.stream_count);
{
const dir = try r.directory(0);
try testing.expectEqual(3, dir.stream_type);
try testing.expectEqual(584, dir.location.data_size);
var bytes = std.ArrayList(u8).init(alloc);
defer bytes.deinit();
var sr = try r.streamReader(dir);
try sr.reader().readAllArrayList(&bytes, std.math.maxInt(usize));
try testing.expectEqual(584, bytes.items.len);
}
}

View File

@ -1,30 +0,0 @@
const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const log = std.log.scoped(.minidump_stream);
/// The known stream types.
pub const thread_list = @import("stream_threadlist.zig");
/// A stream within the minidump file. A stream can be either in an encoded
/// form or decoded form. The encoded form are raw bytes and aren't validated
/// until they're decoded. The decoded form is a structured form of the stream.
///
/// The decoded form is more ergonomic to work with but the encoded form is
/// more efficient to read/write.
pub const Stream = union(enum) {
encoded: EncodedStream,
};
/// An encoded stream value. It is "encoded" in the sense that it is raw bytes
/// with a type associated. The raw bytes are not validated to be correct for
/// the type.
pub const EncodedStream = struct {
type: u32,
data: []const u8,
};
test {
@import("std").testing.refAllDecls(@This());
}

View File

@ -1,117 +0,0 @@
const std = @import("std");
const assert = std.debug.assert;
const external = @import("external.zig");
const readerpkg = @import("reader.zig");
const Reader = readerpkg.Reader;
const ReadError = readerpkg.ReadError;
const log = std.log.scoped(.minidump_stream);
/// This is the list of threads from the process.
///
/// This is the Reader implementation. You usually do not use this directly.
/// Instead, use Reader(T).ThreadList which will get you the same thing.
///
/// ThreadList is stream type 0x3.
/// StreamReader is the Reader(T).StreamReader type.
pub fn ThreadListReader(comptime R: type) type {
return struct {
const Self = @This();
/// The number of threads in the list.
count: u32,
/// The rva to the first thread in the list.
rva: u32,
/// Source data and endianness so we can read.
source: R.Source,
endian: std.builtin.Endian,
pub fn init(r: *R.StreamReader) !Self {
assert(r.directory.stream_type == 0x3);
try r.seekToPayload();
const reader = r.source.reader();
// Our count is always a u32 in the header.
const count = try reader.readInt(u32, r.endian);
// Determine if we have padding in our header. It is possible
// for there to be padding if the list header was written by
// a 32-bit process but is being read on a 64-bit process.
const padding = padding: {
const maybe_size = @sizeOf(u32) + (@sizeOf(external.Thread) * count);
switch (std.math.order(maybe_size, r.directory.location.data_size)) {
// It should never be larger than what the directory says.
.gt => return ReadError.StreamSizeMismatch,
// If the sizes match exactly we're good.
.eq => break :padding 0,
.lt => {
const padding = r.directory.location.data_size - maybe_size;
if (padding != 4) return ReadError.StreamSizeMismatch;
break :padding padding;
},
}
};
// Rva is the location of the first thread in the list.
const rva = r.directory.location.rva + @as(u32, @sizeOf(u32)) + padding;
return .{
.count = count,
.rva = rva,
.source = r.source,
.endian = r.endian,
};
}
/// Get the thread entry for the given index.
///
/// Index is asserted to be less than count.
pub fn thread(self: *const Self, i: usize) !external.Thread {
assert(i < self.count);
// Seek to the thread
const offset: u32 = @intCast(@sizeOf(external.Thread) * i);
const rva: u32 = self.rva + offset;
try self.source.seekableStream().seekTo(rva);
// Read the thread
return try self.source.reader().readStructEndian(
external.Thread,
self.endian,
);
}
};
}
test "minidump: threadlist" {
const testing = std.testing;
const alloc = testing.allocator;
var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp"));
const R = Reader(*@TypeOf(fbs));
const r = try R.init(&fbs);
// Get our thread list stream
const dir = try r.directory(0);
try testing.expectEqual(3, dir.stream_type);
var sr = try r.streamReader(dir);
// Get our rich structure
const v = try R.ThreadList.init(&sr);
log.warn("threadlist count={} rva={}", .{ v.count, v.rva });
try testing.expectEqual(12, v.count);
for (0..v.count) |i| {
const t = try v.thread(i);
log.warn("thread i={} thread={}", .{ i, t });
// Read our stack memory
var stack_reader = try r.locationReader(t.stack.memory);
const bytes = try stack_reader.reader().readAllAlloc(alloc, t.stack.memory.data_size);
defer alloc.free(bytes);
}
}