From 188caf42a11cc3e92eb0bc31dc86c6c49b6fa011 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 12 Nov 2025 09:57:51 -0800 Subject: [PATCH 01/10] search: move PageListSearch to a dedicated file --- src/terminal/search.zig | 884 +----------------------------- src/terminal/search/pagelist.zig | 885 +++++++++++++++++++++++++++++++ 2 files changed, 888 insertions(+), 881 deletions(-) create mode 100644 src/terminal/search/pagelist.zig diff --git a/src/terminal/search.zig b/src/terminal/search.zig index 932ab5a35..a043973ff 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -1,885 +1,7 @@ //! Search functionality for the terminal. -//! -//! At the time of writing this comment, this is a **work in progress**. -//! -//! Search at the time of writing is implemented using a simple -//! boyer-moore-horspool algorithm. The suboptimal part of the implementation -//! is that we need to encode each terminal page into a text buffer in order -//! to apply BMH to it. This is because the terminal page is not laid out -//! in a flat text form. -//! -//! To minimize memory usage, we use a sliding window to search for the -//! needle. The sliding window only keeps the minimum amount of page data -//! in memory to search for a needle (i.e. `needle.len - 1` bytes of overlap -//! between terminal pages). -//! -//! Future work: -//! -//! - PageListSearch on a PageList concurrently with another thread -//! - Handle pruned pages in a PageList to ensure we don't keep references -//! - Repeat search a changing active area of the screen -//! - Reverse search so that more recent matches are found first -//! -const std = @import("std"); -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const CircBuf = @import("../datastruct/main.zig").CircBuf; -const terminal = @import("main.zig"); -const point = terminal.point; -const Page = terminal.Page; -const PageList = terminal.PageList; -const Pin = PageList.Pin; -const Selection = terminal.Selection; -const Screen = terminal.Screen; -const PageFormatter = @import("formatter.zig").PageFormatter; +pub const PageList = @import("search/pagelist.zig").PageListSearch; -/// Searches for a term in a PageList structure. -/// -/// At the time of writing, this does not support searching a pagelist -/// simultaneously as its being used by another thread. This will be resolved -/// in the future. -pub const PageListSearch = struct { - /// The list we're searching. - list: *PageList, - - /// The sliding window of page contents and nodes to search. - window: SlidingWindow, - - /// Initialize the page list search. - /// - /// The needle is not copied and must be kept alive for the duration - /// of the search operation. - pub fn init( - alloc: Allocator, - list: *PageList, - needle: []const u8, - ) Allocator.Error!PageListSearch { - var window = try SlidingWindow.init(alloc, needle); - errdefer window.deinit(); - - return .{ - .list = list, - .window = window, - }; - } - - pub fn deinit(self: *PageListSearch) void { - self.window.deinit(); - } - - /// Find the next match for the needle in the pagelist. This returns - /// null when there are no more matches. - pub fn next(self: *PageListSearch) Allocator.Error!?Selection { - // Try to search for the needle in the window. If we find a match - // then we can return that and we're done. - if (self.window.next()) |sel| return sel; - - // Get our next node. If we have a value in our window then we - // can determine the next node. If we don't, we've never setup the - // window so we use our first node. - var node_: ?*PageList.List.Node = if (self.window.meta.last()) |meta| - meta.node.next - else - self.list.pages.first; - - // Add one pagelist node at a time, look for matches, and repeat - // until we find a match or we reach the end of the pagelist. - // This append then next pattern limits memory usage of the window. - while (node_) |node| : (node_ = node.next) { - try self.window.append(node); - if (self.window.next()) |sel| return sel; - } - - // We've reached the end of the pagelist, no matches. - return null; - } -}; - -/// Searches page nodes via a sliding window. The sliding window maintains -/// the invariant that data isn't pruned until (1) we've searched it and -/// (2) we've accounted for overlaps across pages to fit the needle. -/// -/// The sliding window is first initialized empty. Pages are then appended -/// in the order to search them. If you're doing a reverse search then the -/// pages should be appended in reverse order and the needle should be -/// reversed. -/// -/// All appends grow the window. The window is only pruned when a searc -/// is done (positive or negative match) via `next()`. -/// -/// To avoid unnecessary memory growth, the recommended usage is to -/// call `next()` until it returns null and then `append` the next page -/// and repeat the process. This will always maintain the minimum -/// required memory to search for the needle. -const SlidingWindow = struct { - /// The allocator to use for all the data within this window. We - /// store this rather than passing it around because its already - /// part of multiple elements (eg. Meta's CellMap) and we want to - /// ensure we always use a consistent allocator. Additionally, only - /// a small amount of sliding windows are expected to be in use - /// at any one time so the memory overhead isn't that large. - alloc: Allocator, - - /// The data buffer is a circular buffer of u8 that contains the - /// encoded page text that we can use to search for the needle. - data: DataBuf, - - /// The meta buffer is a circular buffer that contains the metadata - /// about the pages we're searching. This usually isn't that large - /// so callers must iterate through it to find the offset to map - /// data to meta. - meta: MetaBuf, - - /// Offset into data for our current state. This handles the - /// situation where our search moved through meta[0] but didn't - /// do enough to prune it. - data_offset: usize = 0, - - /// The needle we're searching for. Does not own the memory. - needle: []const u8, - - /// A buffer to store the overlap search data. This is used to search - /// overlaps between pages where the match starts on one page and - /// ends on another. The length is always `needle.len * 2`. - overlap_buf: []u8, - - const DataBuf = CircBuf(u8, 0); - const MetaBuf = CircBuf(Meta, undefined); - const Meta = struct { - node: *PageList.List.Node, - cell_map: std.ArrayList(point.Coordinate), - - pub fn deinit(self: *Meta, alloc: Allocator) void { - self.cell_map.deinit(alloc); - } - }; - - pub fn init( - alloc: Allocator, - needle: []const u8, - ) Allocator.Error!SlidingWindow { - var data = try DataBuf.init(alloc, 0); - errdefer data.deinit(alloc); - - var meta = try MetaBuf.init(alloc, 0); - errdefer meta.deinit(alloc); - - const overlap_buf = try alloc.alloc(u8, needle.len * 2); - errdefer alloc.free(overlap_buf); - - return .{ - .alloc = alloc, - .data = data, - .meta = meta, - .needle = needle, - .overlap_buf = overlap_buf, - }; - } - - pub fn deinit(self: *SlidingWindow) void { - self.alloc.free(self.overlap_buf); - self.data.deinit(self.alloc); - - var meta_it = self.meta.iterator(.forward); - while (meta_it.next()) |meta| meta.deinit(self.alloc); - self.meta.deinit(self.alloc); - } - - /// Clear all data but retain allocated capacity. - pub fn clearAndRetainCapacity(self: *SlidingWindow) void { - var meta_it = self.meta.iterator(.forward); - while (meta_it.next()) |meta| meta.deinit(self.alloc); - self.meta.clear(); - self.data.clear(); - self.data_offset = 0; - } - - /// Search the window for the next occurrence of the needle. As - /// the window moves, the window will prune itself while maintaining - /// the invariant that the window is always big enough to contain - /// the needle. - pub fn next(self: *SlidingWindow) ?Selection { - const slices = slices: { - // If we have less data then the needle then we can't possibly match - const data_len = self.data.len(); - if (data_len < self.needle.len) return null; - - break :slices self.data.getPtrSlice( - self.data_offset, - data_len - self.data_offset, - ); - }; - - // Search the first slice for the needle. - if (std.mem.indexOf(u8, slices[0], self.needle)) |idx| { - return self.selection( - idx, - self.needle.len, - ); - } - - // Search the overlap buffer for the needle. - if (slices[0].len > 0 and slices[1].len > 0) overlap: { - // Get up to needle.len - 1 bytes from each side (as much as - // we can) and store it in the overlap buffer. - const prefix: []const u8 = prefix: { - const len = @min(slices[0].len, self.needle.len - 1); - const idx = slices[0].len - len; - break :prefix slices[0][idx..]; - }; - const suffix: []const u8 = suffix: { - const len = @min(slices[1].len, self.needle.len - 1); - break :suffix slices[1][0..len]; - }; - const overlap_len = prefix.len + suffix.len; - assert(overlap_len <= self.overlap_buf.len); - @memcpy(self.overlap_buf[0..prefix.len], prefix); - @memcpy(self.overlap_buf[prefix.len..overlap_len], suffix); - - // Search the overlap - const idx = std.mem.indexOf( - u8, - self.overlap_buf[0..overlap_len], - self.needle, - ) orelse break :overlap; - - // We found a match in the overlap buffer. We need to map the - // index back to the data buffer in order to get our selection. - return self.selection( - slices[0].len - prefix.len + idx, - self.needle.len, - ); - } - - // Search the last slice for the needle. - if (std.mem.indexOf(u8, slices[1], self.needle)) |idx| { - return self.selection( - slices[0].len + idx, - self.needle.len, - ); - } - - // No match. We keep `needle.len - 1` bytes available to - // handle the future overlap case. - var meta_it = self.meta.iterator(.reverse); - prune: { - var saved: usize = 0; - while (meta_it.next()) |meta| { - const needed = self.needle.len - 1 - saved; - if (meta.cell_map.items.len >= needed) { - // We save up to this meta. We set our data offset - // to exactly where it needs to be to continue - // searching. - self.data_offset = meta.cell_map.items.len - needed; - break; - } - - saved += meta.cell_map.items.len; - } else { - // If we exited the while loop naturally then we - // never got the amount we needed and so there is - // nothing to prune. - assert(saved < self.needle.len - 1); - break :prune; - } - - const prune_count = self.meta.len() - meta_it.idx; - if (prune_count == 0) { - // This can happen if we need to save up to the first - // meta value to retain our window. - break :prune; - } - - // We can now delete all the metas up to but NOT including - // the meta we found through meta_it. - meta_it = self.meta.iterator(.forward); - var prune_data_len: usize = 0; - for (0..prune_count) |_| { - const meta = meta_it.next().?; - prune_data_len += meta.cell_map.items.len; - meta.deinit(self.alloc); - } - self.meta.deleteOldest(prune_count); - self.data.deleteOldest(prune_data_len); - } - - // Our data offset now moves to needle.len - 1 from the end so - // that we can handle the overlap case. - self.data_offset = self.data.len() - self.needle.len + 1; - - self.assertIntegrity(); - return null; - } - - /// Return a selection for the given start and length into the data - /// buffer and also prune the data/meta buffers if possible up to - /// this start index. - /// - /// The start index is assumed to be relative to the offset. i.e. - /// index zero is actually at `self.data[self.data_offset]`. The - /// selection will account for the offset. - fn selection( - self: *SlidingWindow, - start_offset: usize, - len: usize, - ) Selection { - const start = start_offset + self.data_offset; - assert(start < self.data.len()); - assert(start + len <= self.data.len()); - - // meta_consumed is the number of bytes we've consumed in the - // data buffer up to and NOT including the meta where we've - // found our pin. This is important because it tells us the - // amount of data we can safely deleted from self.data since - // we can't partially delete a meta block's data. (The partial - // amount is represented by self.data_offset). - var meta_it = self.meta.iterator(.forward); - var meta_consumed: usize = 0; - const tl: Pin = pin(&meta_it, &meta_consumed, start); - - // Store the information required to prune later. We store this - // now because we only want to prune up to our START so we can - // find overlapping matches. - const tl_meta_idx = meta_it.idx - 1; - const tl_meta_consumed = meta_consumed; - - // We have to seek back so that we reinspect our current - // iterator value again in case the start and end are in the - // same segment. - meta_it.seekBy(-1); - const br: Pin = pin(&meta_it, &meta_consumed, start + len - 1); - assert(meta_it.idx >= 1); - - // Our offset into the current meta block is the start index - // minus the amount of data fully consumed. We then add one - // to move one past the match so we don't repeat it. - self.data_offset = start - tl_meta_consumed + 1; - - // meta_it.idx is br's meta index plus one (because the iterator - // moves one past the end; we call next() one last time). So - // we compare against one to check that the meta that we matched - // in has prior meta blocks we can prune. - if (tl_meta_idx > 0) { - // Deinit all our memory in the meta blocks prior to our - // match. - const meta_count = tl_meta_idx; - meta_it.reset(); - for (0..meta_count) |_| meta_it.next().?.deinit(self.alloc); - if (comptime std.debug.runtime_safety) { - assert(meta_it.idx == meta_count); - assert(meta_it.next().?.node == tl.node); - } - self.meta.deleteOldest(meta_count); - - // Delete all the data up to our current index. - assert(tl_meta_consumed > 0); - self.data.deleteOldest(tl_meta_consumed); - } - - self.assertIntegrity(); - return .init(tl, br, false); - } - - /// Convert a data index into a pin. - /// - /// The iterator and offset are both expected to be passed by - /// pointer so that the pin can be efficiently called for multiple - /// indexes (in order). See selection() for an example. - /// - /// Precondition: the index must be within the data buffer. - fn pin( - it: *MetaBuf.Iterator, - offset: *usize, - idx: usize, - ) Pin { - while (it.next()) |meta| { - // meta_i is the index we expect to find the match in the - // cell map within this meta if it contains it. - const meta_i = idx - offset.*; - if (meta_i >= meta.cell_map.items.len) { - // This meta doesn't contain the match. This means we - // can also prune this set of data because we only look - // forward. - offset.* += meta.cell_map.items.len; - continue; - } - - // We found the meta that contains the start of the match. - const map = meta.cell_map.items[meta_i]; - return .{ - .node = meta.node, - .y = @intCast(map.y), - .x = map.x, - }; - } - - // Unreachable because it is a precondition that the index is - // within the data buffer. - unreachable; - } - - /// Add a new node to the sliding window. This will always grow - /// the sliding window; data isn't pruned until it is consumed - /// via a search (via next()). - pub fn append( - self: *SlidingWindow, - node: *PageList.List.Node, - ) Allocator.Error!void { - // Initialize our metadata for the node. - var meta: Meta = .{ - .node = node, - .cell_map = .empty, - }; - errdefer meta.deinit(self.alloc); - - // This is suboptimal but we need to encode the page once to - // temporary memory, and then copy it into our circular buffer. - // In the future, we should benchmark and see if we can encode - // directly into the circular buffer. - var encoded: std.Io.Writer.Allocating = .init(self.alloc); - defer encoded.deinit(); - - // Encode the page into the buffer. - const formatter: PageFormatter = formatter: { - var formatter: PageFormatter = .init(&meta.node.data, .plain); - formatter.point_map = .{ - .alloc = self.alloc, - .map = &meta.cell_map, - }; - break :formatter formatter; - }; - formatter.format(&encoded.writer) catch { - // writer uses anyerror but the only realistic error on - // an ArrayList is out of memory. - return error.OutOfMemory; - }; - assert(meta.cell_map.items.len == encoded.written().len); - - // Ensure our buffers are big enough to store what we need. - try self.data.ensureUnusedCapacity(self.alloc, encoded.written().len); - try self.meta.ensureUnusedCapacity(self.alloc, 1); - - // Append our new node to the circular buffer. - try self.data.appendSlice(encoded.written()); - try self.meta.append(meta); - - self.assertIntegrity(); - } - - fn assertIntegrity(self: *const SlidingWindow) void { - if (comptime !std.debug.runtime_safety) return; - - // We don't run integrity checks on Valgrind because its soooooo slow, - // Valgrind is our integrity checker, and we run these during unit - // tests (non-Valgrind) anyways so we're verifying anyways. - if (std.valgrind.runningOnValgrind() > 0) return; - - // Integrity check: verify our data matches our metadata exactly. - var meta_it = self.meta.iterator(.forward); - var data_len: usize = 0; - while (meta_it.next()) |m| data_len += m.cell_map.items.len; - assert(data_len == self.data.len()); - - // Integrity check: verify our data offset is within bounds. - assert(self.data_offset < self.data.len()); - } -}; - -test "PageListSearch single page" { - const testing = std.testing; - const alloc = testing.allocator; - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("hello. boo! hello. boo!"); - try testing.expect(s.pages.pages.first == s.pages.pages.last); - - var search = try PageListSearch.init(alloc, &s.pages, "boo!"); - defer search.deinit(); - - // We should be able to find two matches. - { - const sel = (try search.next()).?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 7, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 10, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - { - const sel = (try search.next()).?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 19, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 22, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - try testing.expect((try search.next()) == null); - try testing.expect((try search.next()) == null); -} - -test "SlidingWindow empty on init" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "boo!"); - defer w.deinit(); - try testing.expectEqual(0, w.data.len()); - try testing.expectEqual(0, w.meta.len()); -} - -test "SlidingWindow single append" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "boo!"); - defer w.deinit(); - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("hello. boo! hello. boo!"); - - // We want to test single-page cases. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - - // We should be able to find two matches. - { - const sel = w.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 7, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 10, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - { - const sel = w.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 19, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 22, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - try testing.expect(w.next() == null); - try testing.expect(w.next() == null); -} - -test "SlidingWindow single append no match" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "nope!"); - defer w.deinit(); - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("hello. boo! hello. boo!"); - - // We want to test single-page cases. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - - // No matches - try testing.expect(w.next() == null); - try testing.expect(w.next() == null); - - // Should still keep the page - try testing.expectEqual(1, w.meta.len()); -} - -test "SlidingWindow two pages" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "boo!"); - defer w.deinit(); - - var s = try Screen.init(alloc, 80, 24, 1000); - defer s.deinit(); - - // Fill up the first page. The final bytes in the first page - // are "boo!" - const first_page_rows = s.pages.pages.first.?.data.capacity.rows; - for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); - for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); - try s.testWriteString("boo!"); - try testing.expect(s.pages.pages.first == s.pages.pages.last); - try s.testWriteString("\n"); - try testing.expect(s.pages.pages.first != s.pages.pages.last); - try s.testWriteString("hello. boo!"); - - // Add both pages - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node.next.?); - - // Search should find two matches - { - const sel = w.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 76, - .y = 22, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 79, - .y = 22, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - { - const sel = w.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 7, - .y = 23, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 10, - .y = 23, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - try testing.expect(w.next() == null); - try testing.expect(w.next() == null); -} - -test "SlidingWindow two pages match across boundary" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "hello, world"); - defer w.deinit(); - - var s = try Screen.init(alloc, 80, 24, 1000); - defer s.deinit(); - - // Fill up the first page. The final bytes in the first page - // are "boo!" - const first_page_rows = s.pages.pages.first.?.data.capacity.rows; - for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); - for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); - try s.testWriteString("hell"); - try testing.expect(s.pages.pages.first == s.pages.pages.last); - try s.testWriteString("o, world!"); - try testing.expect(s.pages.pages.first != s.pages.pages.last); - - // Add both pages - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node.next.?); - - // Search should find a match - { - const sel = w.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 76, - .y = 22, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 7, - .y = 23, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - try testing.expect(w.next() == null); - try testing.expect(w.next() == null); - - // We shouldn't prune because we don't have enough space - try testing.expectEqual(2, w.meta.len()); -} - -test "SlidingWindow two pages no match prunes first page" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "nope!"); - defer w.deinit(); - - var s = try Screen.init(alloc, 80, 24, 1000); - defer s.deinit(); - - // Fill up the first page. The final bytes in the first page - // are "boo!" - const first_page_rows = s.pages.pages.first.?.data.capacity.rows; - for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); - for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); - try s.testWriteString("boo!"); - try testing.expect(s.pages.pages.first == s.pages.pages.last); - try s.testWriteString("\n"); - try testing.expect(s.pages.pages.first != s.pages.pages.last); - try s.testWriteString("hello. boo!"); - - // Add both pages - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node.next.?); - - // Search should find nothing - try testing.expect(w.next() == null); - try testing.expect(w.next() == null); - - // We should've pruned our page because the second page - // has enough text to contain our needle. - try testing.expectEqual(1, w.meta.len()); -} - -test "SlidingWindow two pages no match keeps both pages" { - const testing = std.testing; - const alloc = testing.allocator; - - var s = try Screen.init(alloc, 80, 24, 1000); - defer s.deinit(); - - // Fill up the first page. The final bytes in the first page - // are "boo!" - const first_page_rows = s.pages.pages.first.?.data.capacity.rows; - for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); - for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); - try s.testWriteString("boo!"); - try testing.expect(s.pages.pages.first == s.pages.pages.last); - try s.testWriteString("\n"); - try testing.expect(s.pages.pages.first != s.pages.pages.last); - try s.testWriteString("hello. boo!"); - - // Imaginary needle for search. Doesn't match! - var needle_list: std.ArrayList(u8) = .empty; - defer needle_list.deinit(alloc); - try needle_list.appendNTimes(alloc, 'x', first_page_rows * s.pages.cols); - const needle: []const u8 = needle_list.items; - - var w = try SlidingWindow.init(alloc, needle); - defer w.deinit(); - - // Add both pages - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node.next.?); - - // Search should find nothing - try testing.expect(w.next() == null); - try testing.expect(w.next() == null); - - // No pruning because both pages are needed to fit needle. - try testing.expectEqual(2, w.meta.len()); -} - -test "SlidingWindow single append across circular buffer boundary" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "abc"); - defer w.deinit(); - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("XXXXXXXXXXXXXXXXXXXboo!XXXXX"); - - // We are trying to break a circular buffer boundary so the way we - // do this is to duplicate the data then do a failing search. This - // will cause the first page to be pruned. The next time we append we'll - // put it in the middle of the circ buffer. We assert this so that if - // our implementation changes our test will fail. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node); - { - // No wrap around yet - const slices = w.data.getPtrSlice(0, w.data.len()); - try testing.expect(slices[0].len > 0); - try testing.expect(slices[1].len == 0); - } - - // Search non-match, prunes page - try testing.expect(w.next() == null); - try testing.expectEqual(1, w.meta.len()); - - // Change the needle, just needs to be the same length (not a real API) - w.needle = "boo"; - - // Add new page, now wraps - try w.append(node); - { - const slices = w.data.getPtrSlice(0, w.data.len()); - try testing.expect(slices[0].len > 0); - try testing.expect(slices[1].len > 0); - } - { - const sel = w.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 19, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 21, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - try testing.expect(w.next() == null); -} - -test "SlidingWindow single append match on boundary" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "abcd"); - defer w.deinit(); - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("o!XXXXXXXXXXXXXXXXXXXbo"); - - // We are trying to break a circular buffer boundary so the way we - // do this is to duplicate the data then do a failing search. This - // will cause the first page to be pruned. The next time we append we'll - // put it in the middle of the circ buffer. We assert this so that if - // our implementation changes our test will fail. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node); - { - // No wrap around yet - const slices = w.data.getPtrSlice(0, w.data.len()); - try testing.expect(slices[0].len > 0); - try testing.expect(slices[1].len == 0); - } - - // Search non-match, prunes page - try testing.expect(w.next() == null); - try testing.expectEqual(1, w.meta.len()); - - // Change the needle, just needs to be the same length (not a real API) - w.needle = "boo!"; - - // Add new page, now wraps - try w.append(node); - { - const slices = w.data.getPtrSlice(0, w.data.len()); - try testing.expect(slices[0].len > 0); - try testing.expect(slices[1].len > 0); - } - { - const sel = w.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 21, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 1, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - try testing.expect(w.next() == null); +test { + @import("std").testing.refAllDecls(@This()); } diff --git a/src/terminal/search/pagelist.zig b/src/terminal/search/pagelist.zig new file mode 100644 index 000000000..336b1dfba --- /dev/null +++ b/src/terminal/search/pagelist.zig @@ -0,0 +1,885 @@ +//! Search functionality for the terminal. +//! +//! At the time of writing this comment, this is a **work in progress**. +//! +//! Search at the time of writing is implemented using a simple +//! boyer-moore-horspool algorithm. The suboptimal part of the implementation +//! is that we need to encode each terminal page into a text buffer in order +//! to apply BMH to it. This is because the terminal page is not laid out +//! in a flat text form. +//! +//! To minimize memory usage, we use a sliding window to search for the +//! needle. The sliding window only keeps the minimum amount of page data +//! in memory to search for a needle (i.e. `needle.len - 1` bytes of overlap +//! between terminal pages). +//! +//! Future work: +//! +//! - PageListSearch on a PageList concurrently with another thread +//! - Handle pruned pages in a PageList to ensure we don't keep references +//! - Repeat search a changing active area of the screen +//! - Reverse search so that more recent matches are found first +//! + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const CircBuf = @import("../../datastruct/main.zig").CircBuf; +const terminal = @import("../main.zig"); +const point = terminal.point; +const Page = terminal.Page; +const PageList = terminal.PageList; +const Pin = PageList.Pin; +const Selection = terminal.Selection; +const Screen = terminal.Screen; +const PageFormatter = @import("../formatter.zig").PageFormatter; + +/// Searches for a term in a PageList structure. +/// +/// At the time of writing, this does not support searching a pagelist +/// simultaneously as its being used by another thread. This will be resolved +/// in the future. +pub const PageListSearch = struct { + /// The list we're searching. + list: *PageList, + + /// The sliding window of page contents and nodes to search. + window: SlidingWindow, + + /// Initialize the page list search. + /// + /// The needle is not copied and must be kept alive for the duration + /// of the search operation. + pub fn init( + alloc: Allocator, + list: *PageList, + needle: []const u8, + ) Allocator.Error!PageListSearch { + var window = try SlidingWindow.init(alloc, needle); + errdefer window.deinit(); + + return .{ + .list = list, + .window = window, + }; + } + + pub fn deinit(self: *PageListSearch) void { + self.window.deinit(); + } + + /// Find the next match for the needle in the pagelist. This returns + /// null when there are no more matches. + pub fn next(self: *PageListSearch) Allocator.Error!?Selection { + // Try to search for the needle in the window. If we find a match + // then we can return that and we're done. + if (self.window.next()) |sel| return sel; + + // Get our next node. If we have a value in our window then we + // can determine the next node. If we don't, we've never setup the + // window so we use our first node. + var node_: ?*PageList.List.Node = if (self.window.meta.last()) |meta| + meta.node.next + else + self.list.pages.first; + + // Add one pagelist node at a time, look for matches, and repeat + // until we find a match or we reach the end of the pagelist. + // This append then next pattern limits memory usage of the window. + while (node_) |node| : (node_ = node.next) { + try self.window.append(node); + if (self.window.next()) |sel| return sel; + } + + // We've reached the end of the pagelist, no matches. + return null; + } +}; + +/// Searches page nodes via a sliding window. The sliding window maintains +/// the invariant that data isn't pruned until (1) we've searched it and +/// (2) we've accounted for overlaps across pages to fit the needle. +/// +/// The sliding window is first initialized empty. Pages are then appended +/// in the order to search them. If you're doing a reverse search then the +/// pages should be appended in reverse order and the needle should be +/// reversed. +/// +/// All appends grow the window. The window is only pruned when a searc +/// is done (positive or negative match) via `next()`. +/// +/// To avoid unnecessary memory growth, the recommended usage is to +/// call `next()` until it returns null and then `append` the next page +/// and repeat the process. This will always maintain the minimum +/// required memory to search for the needle. +const SlidingWindow = struct { + /// The allocator to use for all the data within this window. We + /// store this rather than passing it around because its already + /// part of multiple elements (eg. Meta's CellMap) and we want to + /// ensure we always use a consistent allocator. Additionally, only + /// a small amount of sliding windows are expected to be in use + /// at any one time so the memory overhead isn't that large. + alloc: Allocator, + + /// The data buffer is a circular buffer of u8 that contains the + /// encoded page text that we can use to search for the needle. + data: DataBuf, + + /// The meta buffer is a circular buffer that contains the metadata + /// about the pages we're searching. This usually isn't that large + /// so callers must iterate through it to find the offset to map + /// data to meta. + meta: MetaBuf, + + /// Offset into data for our current state. This handles the + /// situation where our search moved through meta[0] but didn't + /// do enough to prune it. + data_offset: usize = 0, + + /// The needle we're searching for. Does not own the memory. + needle: []const u8, + + /// A buffer to store the overlap search data. This is used to search + /// overlaps between pages where the match starts on one page and + /// ends on another. The length is always `needle.len * 2`. + overlap_buf: []u8, + + const DataBuf = CircBuf(u8, 0); + const MetaBuf = CircBuf(Meta, undefined); + const Meta = struct { + node: *PageList.List.Node, + cell_map: std.ArrayList(point.Coordinate), + + pub fn deinit(self: *Meta, alloc: Allocator) void { + self.cell_map.deinit(alloc); + } + }; + + pub fn init( + alloc: Allocator, + needle: []const u8, + ) Allocator.Error!SlidingWindow { + var data = try DataBuf.init(alloc, 0); + errdefer data.deinit(alloc); + + var meta = try MetaBuf.init(alloc, 0); + errdefer meta.deinit(alloc); + + const overlap_buf = try alloc.alloc(u8, needle.len * 2); + errdefer alloc.free(overlap_buf); + + return .{ + .alloc = alloc, + .data = data, + .meta = meta, + .needle = needle, + .overlap_buf = overlap_buf, + }; + } + + pub fn deinit(self: *SlidingWindow) void { + self.alloc.free(self.overlap_buf); + self.data.deinit(self.alloc); + + var meta_it = self.meta.iterator(.forward); + while (meta_it.next()) |meta| meta.deinit(self.alloc); + self.meta.deinit(self.alloc); + } + + /// Clear all data but retain allocated capacity. + pub fn clearAndRetainCapacity(self: *SlidingWindow) void { + var meta_it = self.meta.iterator(.forward); + while (meta_it.next()) |meta| meta.deinit(self.alloc); + self.meta.clear(); + self.data.clear(); + self.data_offset = 0; + } + + /// Search the window for the next occurrence of the needle. As + /// the window moves, the window will prune itself while maintaining + /// the invariant that the window is always big enough to contain + /// the needle. + pub fn next(self: *SlidingWindow) ?Selection { + const slices = slices: { + // If we have less data then the needle then we can't possibly match + const data_len = self.data.len(); + if (data_len < self.needle.len) return null; + + break :slices self.data.getPtrSlice( + self.data_offset, + data_len - self.data_offset, + ); + }; + + // Search the first slice for the needle. + if (std.mem.indexOf(u8, slices[0], self.needle)) |idx| { + return self.selection( + idx, + self.needle.len, + ); + } + + // Search the overlap buffer for the needle. + if (slices[0].len > 0 and slices[1].len > 0) overlap: { + // Get up to needle.len - 1 bytes from each side (as much as + // we can) and store it in the overlap buffer. + const prefix: []const u8 = prefix: { + const len = @min(slices[0].len, self.needle.len - 1); + const idx = slices[0].len - len; + break :prefix slices[0][idx..]; + }; + const suffix: []const u8 = suffix: { + const len = @min(slices[1].len, self.needle.len - 1); + break :suffix slices[1][0..len]; + }; + const overlap_len = prefix.len + suffix.len; + assert(overlap_len <= self.overlap_buf.len); + @memcpy(self.overlap_buf[0..prefix.len], prefix); + @memcpy(self.overlap_buf[prefix.len..overlap_len], suffix); + + // Search the overlap + const idx = std.mem.indexOf( + u8, + self.overlap_buf[0..overlap_len], + self.needle, + ) orelse break :overlap; + + // We found a match in the overlap buffer. We need to map the + // index back to the data buffer in order to get our selection. + return self.selection( + slices[0].len - prefix.len + idx, + self.needle.len, + ); + } + + // Search the last slice for the needle. + if (std.mem.indexOf(u8, slices[1], self.needle)) |idx| { + return self.selection( + slices[0].len + idx, + self.needle.len, + ); + } + + // No match. We keep `needle.len - 1` bytes available to + // handle the future overlap case. + var meta_it = self.meta.iterator(.reverse); + prune: { + var saved: usize = 0; + while (meta_it.next()) |meta| { + const needed = self.needle.len - 1 - saved; + if (meta.cell_map.items.len >= needed) { + // We save up to this meta. We set our data offset + // to exactly where it needs to be to continue + // searching. + self.data_offset = meta.cell_map.items.len - needed; + break; + } + + saved += meta.cell_map.items.len; + } else { + // If we exited the while loop naturally then we + // never got the amount we needed and so there is + // nothing to prune. + assert(saved < self.needle.len - 1); + break :prune; + } + + const prune_count = self.meta.len() - meta_it.idx; + if (prune_count == 0) { + // This can happen if we need to save up to the first + // meta value to retain our window. + break :prune; + } + + // We can now delete all the metas up to but NOT including + // the meta we found through meta_it. + meta_it = self.meta.iterator(.forward); + var prune_data_len: usize = 0; + for (0..prune_count) |_| { + const meta = meta_it.next().?; + prune_data_len += meta.cell_map.items.len; + meta.deinit(self.alloc); + } + self.meta.deleteOldest(prune_count); + self.data.deleteOldest(prune_data_len); + } + + // Our data offset now moves to needle.len - 1 from the end so + // that we can handle the overlap case. + self.data_offset = self.data.len() - self.needle.len + 1; + + self.assertIntegrity(); + return null; + } + + /// Return a selection for the given start and length into the data + /// buffer and also prune the data/meta buffers if possible up to + /// this start index. + /// + /// The start index is assumed to be relative to the offset. i.e. + /// index zero is actually at `self.data[self.data_offset]`. The + /// selection will account for the offset. + fn selection( + self: *SlidingWindow, + start_offset: usize, + len: usize, + ) Selection { + const start = start_offset + self.data_offset; + assert(start < self.data.len()); + assert(start + len <= self.data.len()); + + // meta_consumed is the number of bytes we've consumed in the + // data buffer up to and NOT including the meta where we've + // found our pin. This is important because it tells us the + // amount of data we can safely deleted from self.data since + // we can't partially delete a meta block's data. (The partial + // amount is represented by self.data_offset). + var meta_it = self.meta.iterator(.forward); + var meta_consumed: usize = 0; + const tl: Pin = pin(&meta_it, &meta_consumed, start); + + // Store the information required to prune later. We store this + // now because we only want to prune up to our START so we can + // find overlapping matches. + const tl_meta_idx = meta_it.idx - 1; + const tl_meta_consumed = meta_consumed; + + // We have to seek back so that we reinspect our current + // iterator value again in case the start and end are in the + // same segment. + meta_it.seekBy(-1); + const br: Pin = pin(&meta_it, &meta_consumed, start + len - 1); + assert(meta_it.idx >= 1); + + // Our offset into the current meta block is the start index + // minus the amount of data fully consumed. We then add one + // to move one past the match so we don't repeat it. + self.data_offset = start - tl_meta_consumed + 1; + + // meta_it.idx is br's meta index plus one (because the iterator + // moves one past the end; we call next() one last time). So + // we compare against one to check that the meta that we matched + // in has prior meta blocks we can prune. + if (tl_meta_idx > 0) { + // Deinit all our memory in the meta blocks prior to our + // match. + const meta_count = tl_meta_idx; + meta_it.reset(); + for (0..meta_count) |_| meta_it.next().?.deinit(self.alloc); + if (comptime std.debug.runtime_safety) { + assert(meta_it.idx == meta_count); + assert(meta_it.next().?.node == tl.node); + } + self.meta.deleteOldest(meta_count); + + // Delete all the data up to our current index. + assert(tl_meta_consumed > 0); + self.data.deleteOldest(tl_meta_consumed); + } + + self.assertIntegrity(); + return .init(tl, br, false); + } + + /// Convert a data index into a pin. + /// + /// The iterator and offset are both expected to be passed by + /// pointer so that the pin can be efficiently called for multiple + /// indexes (in order). See selection() for an example. + /// + /// Precondition: the index must be within the data buffer. + fn pin( + it: *MetaBuf.Iterator, + offset: *usize, + idx: usize, + ) Pin { + while (it.next()) |meta| { + // meta_i is the index we expect to find the match in the + // cell map within this meta if it contains it. + const meta_i = idx - offset.*; + if (meta_i >= meta.cell_map.items.len) { + // This meta doesn't contain the match. This means we + // can also prune this set of data because we only look + // forward. + offset.* += meta.cell_map.items.len; + continue; + } + + // We found the meta that contains the start of the match. + const map = meta.cell_map.items[meta_i]; + return .{ + .node = meta.node, + .y = @intCast(map.y), + .x = map.x, + }; + } + + // Unreachable because it is a precondition that the index is + // within the data buffer. + unreachable; + } + + /// Add a new node to the sliding window. This will always grow + /// the sliding window; data isn't pruned until it is consumed + /// via a search (via next()). + pub fn append( + self: *SlidingWindow, + node: *PageList.List.Node, + ) Allocator.Error!void { + // Initialize our metadata for the node. + var meta: Meta = .{ + .node = node, + .cell_map = .empty, + }; + errdefer meta.deinit(self.alloc); + + // This is suboptimal but we need to encode the page once to + // temporary memory, and then copy it into our circular buffer. + // In the future, we should benchmark and see if we can encode + // directly into the circular buffer. + var encoded: std.Io.Writer.Allocating = .init(self.alloc); + defer encoded.deinit(); + + // Encode the page into the buffer. + const formatter: PageFormatter = formatter: { + var formatter: PageFormatter = .init(&meta.node.data, .plain); + formatter.point_map = .{ + .alloc = self.alloc, + .map = &meta.cell_map, + }; + break :formatter formatter; + }; + formatter.format(&encoded.writer) catch { + // writer uses anyerror but the only realistic error on + // an ArrayList is out of memory. + return error.OutOfMemory; + }; + assert(meta.cell_map.items.len == encoded.written().len); + + // Ensure our buffers are big enough to store what we need. + try self.data.ensureUnusedCapacity(self.alloc, encoded.written().len); + try self.meta.ensureUnusedCapacity(self.alloc, 1); + + // Append our new node to the circular buffer. + try self.data.appendSlice(encoded.written()); + try self.meta.append(meta); + + self.assertIntegrity(); + } + + fn assertIntegrity(self: *const SlidingWindow) void { + if (comptime !std.debug.runtime_safety) return; + + // We don't run integrity checks on Valgrind because its soooooo slow, + // Valgrind is our integrity checker, and we run these during unit + // tests (non-Valgrind) anyways so we're verifying anyways. + if (std.valgrind.runningOnValgrind() > 0) return; + + // Integrity check: verify our data matches our metadata exactly. + var meta_it = self.meta.iterator(.forward); + var data_len: usize = 0; + while (meta_it.next()) |m| data_len += m.cell_map.items.len; + assert(data_len == self.data.len()); + + // Integrity check: verify our data offset is within bounds. + assert(self.data_offset < self.data.len()); + } +}; + +test "PageListSearch single page" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello. boo! hello. boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + + var search = try PageListSearch.init(alloc, &s.pages, "boo!"); + defer search.deinit(); + + // We should be able to find two matches. + { + const sel = (try search.next()).?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 10, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + { + const sel = (try search.next()).?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 19, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 22, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect((try search.next()) == null); + try testing.expect((try search.next()) == null); +} + +test "SlidingWindow empty on init" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "boo!"); + defer w.deinit(); + try testing.expectEqual(0, w.data.len()); + try testing.expectEqual(0, w.meta.len()); +} + +test "SlidingWindow single append" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "boo!"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello. boo! hello. boo!"); + + // We want to test single-page cases. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + + // We should be able to find two matches. + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 10, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 19, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 22, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); +} + +test "SlidingWindow single append no match" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "nope!"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello. boo! hello. boo!"); + + // We want to test single-page cases. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + + // No matches + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); + + // Should still keep the page + try testing.expectEqual(1, w.meta.len()); +} + +test "SlidingWindow two pages" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "boo!"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\n"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + try s.testWriteString("hello. boo!"); + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + try w.append(node.next.?); + + // Search should find two matches + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 76, + .y = 22, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 79, + .y = 22, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 23, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 10, + .y = 23, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); +} + +test "SlidingWindow two pages match across boundary" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "hello, world"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("hell"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("o, world!"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + try w.append(node.next.?); + + // Search should find a match + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 76, + .y = 22, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 23, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); + + // We shouldn't prune because we don't have enough space + try testing.expectEqual(2, w.meta.len()); +} + +test "SlidingWindow two pages no match prunes first page" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "nope!"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\n"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + try s.testWriteString("hello. boo!"); + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + try w.append(node.next.?); + + // Search should find nothing + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); + + // We should've pruned our page because the second page + // has enough text to contain our needle. + try testing.expectEqual(1, w.meta.len()); +} + +test "SlidingWindow two pages no match keeps both pages" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\n"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + try s.testWriteString("hello. boo!"); + + // Imaginary needle for search. Doesn't match! + var needle_list: std.ArrayList(u8) = .empty; + defer needle_list.deinit(alloc); + try needle_list.appendNTimes(alloc, 'x', first_page_rows * s.pages.cols); + const needle: []const u8 = needle_list.items; + + var w = try SlidingWindow.init(alloc, needle); + defer w.deinit(); + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + try w.append(node.next.?); + + // Search should find nothing + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); + + // No pruning because both pages are needed to fit needle. + try testing.expectEqual(2, w.meta.len()); +} + +test "SlidingWindow single append across circular buffer boundary" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "abc"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("XXXXXXXXXXXXXXXXXXXboo!XXXXX"); + + // We are trying to break a circular buffer boundary so the way we + // do this is to duplicate the data then do a failing search. This + // will cause the first page to be pruned. The next time we append we'll + // put it in the middle of the circ buffer. We assert this so that if + // our implementation changes our test will fail. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + try w.append(node); + { + // No wrap around yet + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len == 0); + } + + // Search non-match, prunes page + try testing.expect(w.next() == null); + try testing.expectEqual(1, w.meta.len()); + + // Change the needle, just needs to be the same length (not a real API) + w.needle = "boo"; + + // Add new page, now wraps + try w.append(node); + { + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len > 0); + } + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 19, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 21, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); +} + +test "SlidingWindow single append match on boundary" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "abcd"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("o!XXXXXXXXXXXXXXXXXXXbo"); + + // We are trying to break a circular buffer boundary so the way we + // do this is to duplicate the data then do a failing search. This + // will cause the first page to be pruned. The next time we append we'll + // put it in the middle of the circ buffer. We assert this so that if + // our implementation changes our test will fail. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + try w.append(node); + { + // No wrap around yet + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len == 0); + } + + // Search non-match, prunes page + try testing.expect(w.next() == null); + try testing.expectEqual(1, w.meta.len()); + + // Change the needle, just needs to be the same length (not a real API) + w.needle = "boo!"; + + // Add new page, now wraps + try w.append(node); + { + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len > 0); + } + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 21, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 1, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); +} From 8848e98271fc4b3c6b4f76796fd47db00a10b683 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 12 Nov 2025 10:07:21 -0800 Subject: [PATCH 02/10] terminal: search thread boilerplate (does nothing) --- src/terminal/search.zig | 1 + src/terminal/search/Thread.zig | 63 ++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 src/terminal/search/Thread.zig diff --git a/src/terminal/search.zig b/src/terminal/search.zig index a043973ff..6782f3e10 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -1,6 +1,7 @@ //! Search functionality for the terminal. pub const PageList = @import("search/pagelist.zig").PageListSearch; +pub const Thread = @import("search/Thread.zig"); test { @import("std").testing.refAllDecls(@This()); diff --git a/src/terminal/search/Thread.zig b/src/terminal/search/Thread.zig new file mode 100644 index 000000000..b9f98a9dc --- /dev/null +++ b/src/terminal/search/Thread.zig @@ -0,0 +1,63 @@ +//! Search thread that handles searching a terminal for a string match. +//! This is expected to run on a dedicated thread to try to prevent too much +//! overhead to other terminal read/write operations. +//! +//! The current architecture of search does acquire global locks for accessing +//! terminal data, so there's still added contention, but we do our best to +//! minimize this by trading off memory usage (copying data to minimize lock +//! time). +pub const Thread = @This(); + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const BlockingQueue = @import("../../datastruct/main.zig").BlockingQueue; + +const log = std.log.scoped(.search_thread); + +/// Allocator used for some state +alloc: std.mem.Allocator, + +/// The mailbox that can be used to send this thread messages. Note +/// this is a blocking queue so if it is full you will get errors (or block). +mailbox: *Mailbox, + +/// Initialize the thread. This does not START the thread. This only sets +/// up all the internal state necessary prior to starting the thread. It +/// is up to the caller to start the thread with the threadMain entrypoint. +pub fn init(alloc: Allocator) Thread { + // The mailbox for messaging this thread + var mailbox = try Mailbox.create(alloc); + errdefer mailbox.destroy(alloc); + + return .{ + .alloc = alloc, + .mailbox = mailbox, + }; +} + +/// Clean up the thread. This is only safe to call once the thread +/// completes executing; the caller must join prior to this. +pub fn deinit(self: *Thread) void { + // Nothing can possibly access the mailbox anymore, destroy it. + self.mailbox.destroy(self.alloc); +} + +/// The main entrypoint for the thread. +pub fn threadMain(self: *Thread) void { + // Call child function so we can use errors... + self.threadMain_() catch |err| { + // In the future, we should expose this on the thread struct. + log.warn("search thread err={}", .{err}); + }; +} + +fn threadMain_(self: *Thread) !void { + defer log.debug("search thread exited", .{}); + _ = self; +} + +/// The type used for sending messages to the thread. +pub const Mailbox = BlockingQueue(Message, 64); + +/// The messages that can be sent to the thread. +pub const Message = union(enum) {}; From 6439af0afc4e9ec0920c7db739cc73d96a4d71dc Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 12 Nov 2025 10:27:52 -0800 Subject: [PATCH 03/10] terminal: SlidingWindow search to dedicated file --- src/terminal/search.zig | 3 + src/terminal/search/sliding_window.zig | 766 +++++++++++++++++++++++++ 2 files changed, 769 insertions(+) create mode 100644 src/terminal/search/sliding_window.zig diff --git a/src/terminal/search.zig b/src/terminal/search.zig index 6782f3e10..a375c6ece 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -5,4 +5,7 @@ pub const Thread = @import("search/Thread.zig"); test { @import("std").testing.refAllDecls(@This()); + + // Non-public APIs + _ = @import("search/sliding_window.zig"); } diff --git a/src/terminal/search/sliding_window.zig b/src/terminal/search/sliding_window.zig new file mode 100644 index 000000000..732a2d611 --- /dev/null +++ b/src/terminal/search/sliding_window.zig @@ -0,0 +1,766 @@ +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const CircBuf = @import("../../datastruct/main.zig").CircBuf; +const terminal = @import("../main.zig"); +const point = terminal.point; +const PageList = terminal.PageList; +const Pin = PageList.Pin; +const Selection = terminal.Selection; +const Screen = terminal.Screen; +const PageFormatter = @import("../formatter.zig").PageFormatter; + +/// Searches page nodes via a sliding window. The sliding window maintains +/// the invariant that data isn't pruned until (1) we've searched it and +/// (2) we've accounted for overlaps across pages to fit the needle. +/// +/// The sliding window is first initialized empty. Pages are then appended +/// in the order to search them. If you're doing a reverse search then the +/// pages should be appended in reverse order and the needle should be +/// reversed. +/// +/// All appends grow the window. The window is only pruned when a search +/// is done (positive or negative match) via `next()`. +/// +/// To avoid unnecessary memory growth, the recommended usage is to +/// call `next()` until it returns null and then `append` the next page +/// and repeat the process. This will always maintain the minimum +/// required memory to search for the needle. +/// +/// The caller is responsible for providing the pages and ensuring they're +/// in the proper order. The SlidingWindow itself doesn't own the pages, but +/// it will contain pointers to them in order to return selections. If any +/// pages become invalid, the caller should clear the sliding window and +/// start over. +pub const SlidingWindow = struct { + /// The allocator to use for all the data within this window. We + /// store this rather than passing it around because its already + /// part of multiple elements (eg. Meta's CellMap) and we want to + /// ensure we always use a consistent allocator. Additionally, only + /// a small amount of sliding windows are expected to be in use + /// at any one time so the memory overhead isn't that large. + alloc: Allocator, + + /// The data buffer is a circular buffer of u8 that contains the + /// encoded page text that we can use to search for the needle. + data: DataBuf, + + /// The meta buffer is a circular buffer that contains the metadata + /// about the pages we're searching. This usually isn't that large + /// so callers must iterate through it to find the offset to map + /// data to meta. + meta: MetaBuf, + + /// Offset into data for our current state. This handles the + /// situation where our search moved through meta[0] but didn't + /// do enough to prune it. + data_offset: usize = 0, + + /// The needle we're searching for. Does not own the memory. + needle: []const u8, + + /// A buffer to store the overlap search data. This is used to search + /// overlaps between pages where the match starts on one page and + /// ends on another. The length is always `needle.len * 2`. + overlap_buf: []u8, + + const DataBuf = CircBuf(u8, 0); + const MetaBuf = CircBuf(Meta, undefined); + const Meta = struct { + node: *PageList.List.Node, + cell_map: std.ArrayList(point.Coordinate), + + pub fn deinit(self: *Meta, alloc: Allocator) void { + self.cell_map.deinit(alloc); + } + }; + + pub fn init( + alloc: Allocator, + needle: []const u8, + ) Allocator.Error!SlidingWindow { + var data = try DataBuf.init(alloc, 0); + errdefer data.deinit(alloc); + + var meta = try MetaBuf.init(alloc, 0); + errdefer meta.deinit(alloc); + + const overlap_buf = try alloc.alloc(u8, needle.len * 2); + errdefer alloc.free(overlap_buf); + + return .{ + .alloc = alloc, + .data = data, + .meta = meta, + .needle = needle, + .overlap_buf = overlap_buf, + }; + } + + pub fn deinit(self: *SlidingWindow) void { + self.alloc.free(self.overlap_buf); + self.data.deinit(self.alloc); + + var meta_it = self.meta.iterator(.forward); + while (meta_it.next()) |meta| meta.deinit(self.alloc); + self.meta.deinit(self.alloc); + } + + /// Clear all data but retain allocated capacity. + pub fn clearAndRetainCapacity(self: *SlidingWindow) void { + var meta_it = self.meta.iterator(.forward); + while (meta_it.next()) |meta| meta.deinit(self.alloc); + self.meta.clear(); + self.data.clear(); + self.data_offset = 0; + } + + /// Search the window for the next occurrence of the needle. As + /// the window moves, the window will prune itself while maintaining + /// the invariant that the window is always big enough to contain + /// the needle. + pub fn next(self: *SlidingWindow) ?Selection { + const slices = slices: { + // If we have less data then the needle then we can't possibly match + const data_len = self.data.len(); + if (data_len < self.needle.len) return null; + + break :slices self.data.getPtrSlice( + self.data_offset, + data_len - self.data_offset, + ); + }; + + // Search the first slice for the needle. + if (std.mem.indexOf(u8, slices[0], self.needle)) |idx| { + return self.selection( + idx, + self.needle.len, + ); + } + + // Search the overlap buffer for the needle. + if (slices[0].len > 0 and slices[1].len > 0) overlap: { + // Get up to needle.len - 1 bytes from each side (as much as + // we can) and store it in the overlap buffer. + const prefix: []const u8 = prefix: { + const len = @min(slices[0].len, self.needle.len - 1); + const idx = slices[0].len - len; + break :prefix slices[0][idx..]; + }; + const suffix: []const u8 = suffix: { + const len = @min(slices[1].len, self.needle.len - 1); + break :suffix slices[1][0..len]; + }; + const overlap_len = prefix.len + suffix.len; + assert(overlap_len <= self.overlap_buf.len); + @memcpy(self.overlap_buf[0..prefix.len], prefix); + @memcpy(self.overlap_buf[prefix.len..overlap_len], suffix); + + // Search the overlap + const idx = std.mem.indexOf( + u8, + self.overlap_buf[0..overlap_len], + self.needle, + ) orelse break :overlap; + + // We found a match in the overlap buffer. We need to map the + // index back to the data buffer in order to get our selection. + return self.selection( + slices[0].len - prefix.len + idx, + self.needle.len, + ); + } + + // Search the last slice for the needle. + if (std.mem.indexOf(u8, slices[1], self.needle)) |idx| { + return self.selection( + slices[0].len + idx, + self.needle.len, + ); + } + + // No match. We keep `needle.len - 1` bytes available to + // handle the future overlap case. + var meta_it = self.meta.iterator(.reverse); + prune: { + var saved: usize = 0; + while (meta_it.next()) |meta| { + const needed = self.needle.len - 1 - saved; + if (meta.cell_map.items.len >= needed) { + // We save up to this meta. We set our data offset + // to exactly where it needs to be to continue + // searching. + self.data_offset = meta.cell_map.items.len - needed; + break; + } + + saved += meta.cell_map.items.len; + } else { + // If we exited the while loop naturally then we + // never got the amount we needed and so there is + // nothing to prune. + assert(saved < self.needle.len - 1); + break :prune; + } + + const prune_count = self.meta.len() - meta_it.idx; + if (prune_count == 0) { + // This can happen if we need to save up to the first + // meta value to retain our window. + break :prune; + } + + // We can now delete all the metas up to but NOT including + // the meta we found through meta_it. + meta_it = self.meta.iterator(.forward); + var prune_data_len: usize = 0; + for (0..prune_count) |_| { + const meta = meta_it.next().?; + prune_data_len += meta.cell_map.items.len; + meta.deinit(self.alloc); + } + self.meta.deleteOldest(prune_count); + self.data.deleteOldest(prune_data_len); + } + + // Our data offset now moves to needle.len - 1 from the end so + // that we can handle the overlap case. + self.data_offset = self.data.len() - self.needle.len + 1; + + self.assertIntegrity(); + return null; + } + + /// Return a selection for the given start and length into the data + /// buffer and also prune the data/meta buffers if possible up to + /// this start index. + /// + /// The start index is assumed to be relative to the offset. i.e. + /// index zero is actually at `self.data[self.data_offset]`. The + /// selection will account for the offset. + fn selection( + self: *SlidingWindow, + start_offset: usize, + len: usize, + ) Selection { + const start = start_offset + self.data_offset; + assert(start < self.data.len()); + assert(start + len <= self.data.len()); + + // meta_consumed is the number of bytes we've consumed in the + // data buffer up to and NOT including the meta where we've + // found our pin. This is important because it tells us the + // amount of data we can safely deleted from self.data since + // we can't partially delete a meta block's data. (The partial + // amount is represented by self.data_offset). + var meta_it = self.meta.iterator(.forward); + var meta_consumed: usize = 0; + const tl: Pin = pin(&meta_it, &meta_consumed, start); + + // Store the information required to prune later. We store this + // now because we only want to prune up to our START so we can + // find overlapping matches. + const tl_meta_idx = meta_it.idx - 1; + const tl_meta_consumed = meta_consumed; + + // We have to seek back so that we reinspect our current + // iterator value again in case the start and end are in the + // same segment. + meta_it.seekBy(-1); + const br: Pin = pin(&meta_it, &meta_consumed, start + len - 1); + assert(meta_it.idx >= 1); + + // Our offset into the current meta block is the start index + // minus the amount of data fully consumed. We then add one + // to move one past the match so we don't repeat it. + self.data_offset = start - tl_meta_consumed + 1; + + // meta_it.idx is br's meta index plus one (because the iterator + // moves one past the end; we call next() one last time). So + // we compare against one to check that the meta that we matched + // in has prior meta blocks we can prune. + if (tl_meta_idx > 0) { + // Deinit all our memory in the meta blocks prior to our + // match. + const meta_count = tl_meta_idx; + meta_it.reset(); + for (0..meta_count) |_| meta_it.next().?.deinit(self.alloc); + if (comptime std.debug.runtime_safety) { + assert(meta_it.idx == meta_count); + assert(meta_it.next().?.node == tl.node); + } + self.meta.deleteOldest(meta_count); + + // Delete all the data up to our current index. + assert(tl_meta_consumed > 0); + self.data.deleteOldest(tl_meta_consumed); + } + + self.assertIntegrity(); + return .init(tl, br, false); + } + + /// Convert a data index into a pin. + /// + /// The iterator and offset are both expected to be passed by + /// pointer so that the pin can be efficiently called for multiple + /// indexes (in order). See selection() for an example. + /// + /// Precondition: the index must be within the data buffer. + fn pin( + it: *MetaBuf.Iterator, + offset: *usize, + idx: usize, + ) Pin { + while (it.next()) |meta| { + // meta_i is the index we expect to find the match in the + // cell map within this meta if it contains it. + const meta_i = idx - offset.*; + if (meta_i >= meta.cell_map.items.len) { + // This meta doesn't contain the match. This means we + // can also prune this set of data because we only look + // forward. + offset.* += meta.cell_map.items.len; + continue; + } + + // We found the meta that contains the start of the match. + const map = meta.cell_map.items[meta_i]; + return .{ + .node = meta.node, + .y = @intCast(map.y), + .x = map.x, + }; + } + + // Unreachable because it is a precondition that the index is + // within the data buffer. + unreachable; + } + + /// Add a new node to the sliding window. This will always grow + /// the sliding window; data isn't pruned until it is consumed + /// via a search (via next()). + pub fn append( + self: *SlidingWindow, + node: *PageList.List.Node, + ) Allocator.Error!void { + // Initialize our metadata for the node. + var meta: Meta = .{ + .node = node, + .cell_map = .empty, + }; + errdefer meta.deinit(self.alloc); + + // This is suboptimal but we need to encode the page once to + // temporary memory, and then copy it into our circular buffer. + // In the future, we should benchmark and see if we can encode + // directly into the circular buffer. + var encoded: std.Io.Writer.Allocating = .init(self.alloc); + defer encoded.deinit(); + + // Encode the page into the buffer. + const formatter: PageFormatter = formatter: { + var formatter: PageFormatter = .init(&meta.node.data, .plain); + formatter.point_map = .{ + .alloc = self.alloc, + .map = &meta.cell_map, + }; + break :formatter formatter; + }; + formatter.format(&encoded.writer) catch { + // writer uses anyerror but the only realistic error on + // an ArrayList is out of memory. + return error.OutOfMemory; + }; + assert(meta.cell_map.items.len == encoded.written().len); + + // Ensure our buffers are big enough to store what we need. + try self.data.ensureUnusedCapacity(self.alloc, encoded.written().len); + try self.meta.ensureUnusedCapacity(self.alloc, 1); + + // Append our new node to the circular buffer. + try self.data.appendSlice(encoded.written()); + try self.meta.append(meta); + + self.assertIntegrity(); + } + + fn assertIntegrity(self: *const SlidingWindow) void { + if (comptime !std.debug.runtime_safety) return; + + // We don't run integrity checks on Valgrind because its soooooo slow, + // Valgrind is our integrity checker, and we run these during unit + // tests (non-Valgrind) anyways so we're verifying anyways. + if (std.valgrind.runningOnValgrind() > 0) return; + + // Integrity check: verify our data matches our metadata exactly. + var meta_it = self.meta.iterator(.forward); + var data_len: usize = 0; + while (meta_it.next()) |m| data_len += m.cell_map.items.len; + assert(data_len == self.data.len()); + + // Integrity check: verify our data offset is within bounds. + assert(self.data_offset < self.data.len()); + } +}; + +test "SlidingWindow empty on init" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "boo!"); + defer w.deinit(); + try testing.expectEqual(0, w.data.len()); + try testing.expectEqual(0, w.meta.len()); +} + +test "SlidingWindow single append" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "boo!"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello. boo! hello. boo!"); + + // We want to test single-page cases. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + + // We should be able to find two matches. + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 10, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 19, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 22, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); +} + +test "SlidingWindow single append no match" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "nope!"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello. boo! hello. boo!"); + + // We want to test single-page cases. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + + // No matches + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); + + // Should still keep the page + try testing.expectEqual(1, w.meta.len()); +} + +test "SlidingWindow two pages" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "boo!"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\n"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + try s.testWriteString("hello. boo!"); + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + try w.append(node.next.?); + + // Search should find two matches + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 76, + .y = 22, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 79, + .y = 22, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 23, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 10, + .y = 23, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); +} + +test "SlidingWindow two pages match across boundary" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "hello, world"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("hell"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("o, world!"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + try w.append(node.next.?); + + // Search should find a match + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 76, + .y = 22, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 23, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); + + // We shouldn't prune because we don't have enough space + try testing.expectEqual(2, w.meta.len()); +} + +test "SlidingWindow two pages no match prunes first page" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "nope!"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\n"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + try s.testWriteString("hello. boo!"); + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + try w.append(node.next.?); + + // Search should find nothing + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); + + // We should've pruned our page because the second page + // has enough text to contain our needle. + try testing.expectEqual(1, w.meta.len()); +} + +test "SlidingWindow two pages no match keeps both pages" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\n"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + try s.testWriteString("hello. boo!"); + + // Imaginary needle for search. Doesn't match! + var needle_list: std.ArrayList(u8) = .empty; + defer needle_list.deinit(alloc); + try needle_list.appendNTimes(alloc, 'x', first_page_rows * s.pages.cols); + const needle: []const u8 = needle_list.items; + + var w = try SlidingWindow.init(alloc, needle); + defer w.deinit(); + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + try w.append(node.next.?); + + // Search should find nothing + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); + + // No pruning because both pages are needed to fit needle. + try testing.expectEqual(2, w.meta.len()); +} + +test "SlidingWindow single append across circular buffer boundary" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "abc"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("XXXXXXXXXXXXXXXXXXXboo!XXXXX"); + + // We are trying to break a circular buffer boundary so the way we + // do this is to duplicate the data then do a failing search. This + // will cause the first page to be pruned. The next time we append we'll + // put it in the middle of the circ buffer. We assert this so that if + // our implementation changes our test will fail. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + try w.append(node); + { + // No wrap around yet + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len == 0); + } + + // Search non-match, prunes page + try testing.expect(w.next() == null); + try testing.expectEqual(1, w.meta.len()); + + // Change the needle, just needs to be the same length (not a real API) + w.needle = "boo"; + + // Add new page, now wraps + try w.append(node); + { + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len > 0); + } + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 19, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 21, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); +} + +test "SlidingWindow single append match on boundary" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "abcd"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("o!XXXXXXXXXXXXXXXXXXXbo"); + + // We are trying to break a circular buffer boundary so the way we + // do this is to duplicate the data then do a failing search. This + // will cause the first page to be pruned. The next time we append we'll + // put it in the middle of the circ buffer. We assert this so that if + // our implementation changes our test will fail. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + try w.append(node); + { + // No wrap around yet + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len == 0); + } + + // Search non-match, prunes page + try testing.expect(w.next() == null); + try testing.expectEqual(1, w.meta.len()); + + // Change the needle, just needs to be the same length (not a real API) + w.needle = "boo!"; + + // Add new page, now wraps + try w.append(node); + { + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len > 0); + } + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 21, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 1, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); +} From 43835d146878964c7493590924b7465e9b7b46b9 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 12 Nov 2025 11:22:56 -0800 Subject: [PATCH 04/10] terminal: SlidingWindow supports forward/reverse directions --- src/terminal/search/pagelist.zig | 797 +------------------------ src/terminal/search/sliding_window.zig | 434 +++++++++++++- 2 files changed, 419 insertions(+), 812 deletions(-) diff --git a/src/terminal/search/pagelist.zig b/src/terminal/search/pagelist.zig index 336b1dfba..cb9d0ee45 100644 --- a/src/terminal/search/pagelist.zig +++ b/src/terminal/search/pagelist.zig @@ -33,6 +33,7 @@ const Pin = PageList.Pin; const Selection = terminal.Selection; const Screen = terminal.Screen; const PageFormatter = @import("../formatter.zig").PageFormatter; +const SlidingWindow = @import("sliding_window.zig").SlidingWindow; /// Searches for a term in a PageList structure. /// @@ -46,16 +47,14 @@ pub const PageListSearch = struct { /// The sliding window of page contents and nodes to search. window: SlidingWindow, - /// Initialize the page list search. - /// - /// The needle is not copied and must be kept alive for the duration - /// of the search operation. + /// Initialize the page list search. The needle is copied so it can + /// be freed immediately. pub fn init( alloc: Allocator, list: *PageList, needle: []const u8, ) Allocator.Error!PageListSearch { - var window = try SlidingWindow.init(alloc, needle); + var window: SlidingWindow = try .init(alloc, .forward, needle); errdefer window.deinit(); return .{ @@ -95,791 +94,3 @@ pub const PageListSearch = struct { return null; } }; - -/// Searches page nodes via a sliding window. The sliding window maintains -/// the invariant that data isn't pruned until (1) we've searched it and -/// (2) we've accounted for overlaps across pages to fit the needle. -/// -/// The sliding window is first initialized empty. Pages are then appended -/// in the order to search them. If you're doing a reverse search then the -/// pages should be appended in reverse order and the needle should be -/// reversed. -/// -/// All appends grow the window. The window is only pruned when a searc -/// is done (positive or negative match) via `next()`. -/// -/// To avoid unnecessary memory growth, the recommended usage is to -/// call `next()` until it returns null and then `append` the next page -/// and repeat the process. This will always maintain the minimum -/// required memory to search for the needle. -const SlidingWindow = struct { - /// The allocator to use for all the data within this window. We - /// store this rather than passing it around because its already - /// part of multiple elements (eg. Meta's CellMap) and we want to - /// ensure we always use a consistent allocator. Additionally, only - /// a small amount of sliding windows are expected to be in use - /// at any one time so the memory overhead isn't that large. - alloc: Allocator, - - /// The data buffer is a circular buffer of u8 that contains the - /// encoded page text that we can use to search for the needle. - data: DataBuf, - - /// The meta buffer is a circular buffer that contains the metadata - /// about the pages we're searching. This usually isn't that large - /// so callers must iterate through it to find the offset to map - /// data to meta. - meta: MetaBuf, - - /// Offset into data for our current state. This handles the - /// situation where our search moved through meta[0] but didn't - /// do enough to prune it. - data_offset: usize = 0, - - /// The needle we're searching for. Does not own the memory. - needle: []const u8, - - /// A buffer to store the overlap search data. This is used to search - /// overlaps between pages where the match starts on one page and - /// ends on another. The length is always `needle.len * 2`. - overlap_buf: []u8, - - const DataBuf = CircBuf(u8, 0); - const MetaBuf = CircBuf(Meta, undefined); - const Meta = struct { - node: *PageList.List.Node, - cell_map: std.ArrayList(point.Coordinate), - - pub fn deinit(self: *Meta, alloc: Allocator) void { - self.cell_map.deinit(alloc); - } - }; - - pub fn init( - alloc: Allocator, - needle: []const u8, - ) Allocator.Error!SlidingWindow { - var data = try DataBuf.init(alloc, 0); - errdefer data.deinit(alloc); - - var meta = try MetaBuf.init(alloc, 0); - errdefer meta.deinit(alloc); - - const overlap_buf = try alloc.alloc(u8, needle.len * 2); - errdefer alloc.free(overlap_buf); - - return .{ - .alloc = alloc, - .data = data, - .meta = meta, - .needle = needle, - .overlap_buf = overlap_buf, - }; - } - - pub fn deinit(self: *SlidingWindow) void { - self.alloc.free(self.overlap_buf); - self.data.deinit(self.alloc); - - var meta_it = self.meta.iterator(.forward); - while (meta_it.next()) |meta| meta.deinit(self.alloc); - self.meta.deinit(self.alloc); - } - - /// Clear all data but retain allocated capacity. - pub fn clearAndRetainCapacity(self: *SlidingWindow) void { - var meta_it = self.meta.iterator(.forward); - while (meta_it.next()) |meta| meta.deinit(self.alloc); - self.meta.clear(); - self.data.clear(); - self.data_offset = 0; - } - - /// Search the window for the next occurrence of the needle. As - /// the window moves, the window will prune itself while maintaining - /// the invariant that the window is always big enough to contain - /// the needle. - pub fn next(self: *SlidingWindow) ?Selection { - const slices = slices: { - // If we have less data then the needle then we can't possibly match - const data_len = self.data.len(); - if (data_len < self.needle.len) return null; - - break :slices self.data.getPtrSlice( - self.data_offset, - data_len - self.data_offset, - ); - }; - - // Search the first slice for the needle. - if (std.mem.indexOf(u8, slices[0], self.needle)) |idx| { - return self.selection( - idx, - self.needle.len, - ); - } - - // Search the overlap buffer for the needle. - if (slices[0].len > 0 and slices[1].len > 0) overlap: { - // Get up to needle.len - 1 bytes from each side (as much as - // we can) and store it in the overlap buffer. - const prefix: []const u8 = prefix: { - const len = @min(slices[0].len, self.needle.len - 1); - const idx = slices[0].len - len; - break :prefix slices[0][idx..]; - }; - const suffix: []const u8 = suffix: { - const len = @min(slices[1].len, self.needle.len - 1); - break :suffix slices[1][0..len]; - }; - const overlap_len = prefix.len + suffix.len; - assert(overlap_len <= self.overlap_buf.len); - @memcpy(self.overlap_buf[0..prefix.len], prefix); - @memcpy(self.overlap_buf[prefix.len..overlap_len], suffix); - - // Search the overlap - const idx = std.mem.indexOf( - u8, - self.overlap_buf[0..overlap_len], - self.needle, - ) orelse break :overlap; - - // We found a match in the overlap buffer. We need to map the - // index back to the data buffer in order to get our selection. - return self.selection( - slices[0].len - prefix.len + idx, - self.needle.len, - ); - } - - // Search the last slice for the needle. - if (std.mem.indexOf(u8, slices[1], self.needle)) |idx| { - return self.selection( - slices[0].len + idx, - self.needle.len, - ); - } - - // No match. We keep `needle.len - 1` bytes available to - // handle the future overlap case. - var meta_it = self.meta.iterator(.reverse); - prune: { - var saved: usize = 0; - while (meta_it.next()) |meta| { - const needed = self.needle.len - 1 - saved; - if (meta.cell_map.items.len >= needed) { - // We save up to this meta. We set our data offset - // to exactly where it needs to be to continue - // searching. - self.data_offset = meta.cell_map.items.len - needed; - break; - } - - saved += meta.cell_map.items.len; - } else { - // If we exited the while loop naturally then we - // never got the amount we needed and so there is - // nothing to prune. - assert(saved < self.needle.len - 1); - break :prune; - } - - const prune_count = self.meta.len() - meta_it.idx; - if (prune_count == 0) { - // This can happen if we need to save up to the first - // meta value to retain our window. - break :prune; - } - - // We can now delete all the metas up to but NOT including - // the meta we found through meta_it. - meta_it = self.meta.iterator(.forward); - var prune_data_len: usize = 0; - for (0..prune_count) |_| { - const meta = meta_it.next().?; - prune_data_len += meta.cell_map.items.len; - meta.deinit(self.alloc); - } - self.meta.deleteOldest(prune_count); - self.data.deleteOldest(prune_data_len); - } - - // Our data offset now moves to needle.len - 1 from the end so - // that we can handle the overlap case. - self.data_offset = self.data.len() - self.needle.len + 1; - - self.assertIntegrity(); - return null; - } - - /// Return a selection for the given start and length into the data - /// buffer and also prune the data/meta buffers if possible up to - /// this start index. - /// - /// The start index is assumed to be relative to the offset. i.e. - /// index zero is actually at `self.data[self.data_offset]`. The - /// selection will account for the offset. - fn selection( - self: *SlidingWindow, - start_offset: usize, - len: usize, - ) Selection { - const start = start_offset + self.data_offset; - assert(start < self.data.len()); - assert(start + len <= self.data.len()); - - // meta_consumed is the number of bytes we've consumed in the - // data buffer up to and NOT including the meta where we've - // found our pin. This is important because it tells us the - // amount of data we can safely deleted from self.data since - // we can't partially delete a meta block's data. (The partial - // amount is represented by self.data_offset). - var meta_it = self.meta.iterator(.forward); - var meta_consumed: usize = 0; - const tl: Pin = pin(&meta_it, &meta_consumed, start); - - // Store the information required to prune later. We store this - // now because we only want to prune up to our START so we can - // find overlapping matches. - const tl_meta_idx = meta_it.idx - 1; - const tl_meta_consumed = meta_consumed; - - // We have to seek back so that we reinspect our current - // iterator value again in case the start and end are in the - // same segment. - meta_it.seekBy(-1); - const br: Pin = pin(&meta_it, &meta_consumed, start + len - 1); - assert(meta_it.idx >= 1); - - // Our offset into the current meta block is the start index - // minus the amount of data fully consumed. We then add one - // to move one past the match so we don't repeat it. - self.data_offset = start - tl_meta_consumed + 1; - - // meta_it.idx is br's meta index plus one (because the iterator - // moves one past the end; we call next() one last time). So - // we compare against one to check that the meta that we matched - // in has prior meta blocks we can prune. - if (tl_meta_idx > 0) { - // Deinit all our memory in the meta blocks prior to our - // match. - const meta_count = tl_meta_idx; - meta_it.reset(); - for (0..meta_count) |_| meta_it.next().?.deinit(self.alloc); - if (comptime std.debug.runtime_safety) { - assert(meta_it.idx == meta_count); - assert(meta_it.next().?.node == tl.node); - } - self.meta.deleteOldest(meta_count); - - // Delete all the data up to our current index. - assert(tl_meta_consumed > 0); - self.data.deleteOldest(tl_meta_consumed); - } - - self.assertIntegrity(); - return .init(tl, br, false); - } - - /// Convert a data index into a pin. - /// - /// The iterator and offset are both expected to be passed by - /// pointer so that the pin can be efficiently called for multiple - /// indexes (in order). See selection() for an example. - /// - /// Precondition: the index must be within the data buffer. - fn pin( - it: *MetaBuf.Iterator, - offset: *usize, - idx: usize, - ) Pin { - while (it.next()) |meta| { - // meta_i is the index we expect to find the match in the - // cell map within this meta if it contains it. - const meta_i = idx - offset.*; - if (meta_i >= meta.cell_map.items.len) { - // This meta doesn't contain the match. This means we - // can also prune this set of data because we only look - // forward. - offset.* += meta.cell_map.items.len; - continue; - } - - // We found the meta that contains the start of the match. - const map = meta.cell_map.items[meta_i]; - return .{ - .node = meta.node, - .y = @intCast(map.y), - .x = map.x, - }; - } - - // Unreachable because it is a precondition that the index is - // within the data buffer. - unreachable; - } - - /// Add a new node to the sliding window. This will always grow - /// the sliding window; data isn't pruned until it is consumed - /// via a search (via next()). - pub fn append( - self: *SlidingWindow, - node: *PageList.List.Node, - ) Allocator.Error!void { - // Initialize our metadata for the node. - var meta: Meta = .{ - .node = node, - .cell_map = .empty, - }; - errdefer meta.deinit(self.alloc); - - // This is suboptimal but we need to encode the page once to - // temporary memory, and then copy it into our circular buffer. - // In the future, we should benchmark and see if we can encode - // directly into the circular buffer. - var encoded: std.Io.Writer.Allocating = .init(self.alloc); - defer encoded.deinit(); - - // Encode the page into the buffer. - const formatter: PageFormatter = formatter: { - var formatter: PageFormatter = .init(&meta.node.data, .plain); - formatter.point_map = .{ - .alloc = self.alloc, - .map = &meta.cell_map, - }; - break :formatter formatter; - }; - formatter.format(&encoded.writer) catch { - // writer uses anyerror but the only realistic error on - // an ArrayList is out of memory. - return error.OutOfMemory; - }; - assert(meta.cell_map.items.len == encoded.written().len); - - // Ensure our buffers are big enough to store what we need. - try self.data.ensureUnusedCapacity(self.alloc, encoded.written().len); - try self.meta.ensureUnusedCapacity(self.alloc, 1); - - // Append our new node to the circular buffer. - try self.data.appendSlice(encoded.written()); - try self.meta.append(meta); - - self.assertIntegrity(); - } - - fn assertIntegrity(self: *const SlidingWindow) void { - if (comptime !std.debug.runtime_safety) return; - - // We don't run integrity checks on Valgrind because its soooooo slow, - // Valgrind is our integrity checker, and we run these during unit - // tests (non-Valgrind) anyways so we're verifying anyways. - if (std.valgrind.runningOnValgrind() > 0) return; - - // Integrity check: verify our data matches our metadata exactly. - var meta_it = self.meta.iterator(.forward); - var data_len: usize = 0; - while (meta_it.next()) |m| data_len += m.cell_map.items.len; - assert(data_len == self.data.len()); - - // Integrity check: verify our data offset is within bounds. - assert(self.data_offset < self.data.len()); - } -}; - -test "PageListSearch single page" { - const testing = std.testing; - const alloc = testing.allocator; - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("hello. boo! hello. boo!"); - try testing.expect(s.pages.pages.first == s.pages.pages.last); - - var search = try PageListSearch.init(alloc, &s.pages, "boo!"); - defer search.deinit(); - - // We should be able to find two matches. - { - const sel = (try search.next()).?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 7, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 10, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - { - const sel = (try search.next()).?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 19, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 22, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - try testing.expect((try search.next()) == null); - try testing.expect((try search.next()) == null); -} - -test "SlidingWindow empty on init" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "boo!"); - defer w.deinit(); - try testing.expectEqual(0, w.data.len()); - try testing.expectEqual(0, w.meta.len()); -} - -test "SlidingWindow single append" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "boo!"); - defer w.deinit(); - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("hello. boo! hello. boo!"); - - // We want to test single-page cases. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - - // We should be able to find two matches. - { - const sel = w.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 7, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 10, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - { - const sel = w.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 19, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 22, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - try testing.expect(w.next() == null); - try testing.expect(w.next() == null); -} - -test "SlidingWindow single append no match" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "nope!"); - defer w.deinit(); - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("hello. boo! hello. boo!"); - - // We want to test single-page cases. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - - // No matches - try testing.expect(w.next() == null); - try testing.expect(w.next() == null); - - // Should still keep the page - try testing.expectEqual(1, w.meta.len()); -} - -test "SlidingWindow two pages" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "boo!"); - defer w.deinit(); - - var s = try Screen.init(alloc, 80, 24, 1000); - defer s.deinit(); - - // Fill up the first page. The final bytes in the first page - // are "boo!" - const first_page_rows = s.pages.pages.first.?.data.capacity.rows; - for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); - for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); - try s.testWriteString("boo!"); - try testing.expect(s.pages.pages.first == s.pages.pages.last); - try s.testWriteString("\n"); - try testing.expect(s.pages.pages.first != s.pages.pages.last); - try s.testWriteString("hello. boo!"); - - // Add both pages - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node.next.?); - - // Search should find two matches - { - const sel = w.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 76, - .y = 22, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 79, - .y = 22, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - { - const sel = w.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 7, - .y = 23, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 10, - .y = 23, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - try testing.expect(w.next() == null); - try testing.expect(w.next() == null); -} - -test "SlidingWindow two pages match across boundary" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "hello, world"); - defer w.deinit(); - - var s = try Screen.init(alloc, 80, 24, 1000); - defer s.deinit(); - - // Fill up the first page. The final bytes in the first page - // are "boo!" - const first_page_rows = s.pages.pages.first.?.data.capacity.rows; - for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); - for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); - try s.testWriteString("hell"); - try testing.expect(s.pages.pages.first == s.pages.pages.last); - try s.testWriteString("o, world!"); - try testing.expect(s.pages.pages.first != s.pages.pages.last); - - // Add both pages - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node.next.?); - - // Search should find a match - { - const sel = w.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 76, - .y = 22, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 7, - .y = 23, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - try testing.expect(w.next() == null); - try testing.expect(w.next() == null); - - // We shouldn't prune because we don't have enough space - try testing.expectEqual(2, w.meta.len()); -} - -test "SlidingWindow two pages no match prunes first page" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "nope!"); - defer w.deinit(); - - var s = try Screen.init(alloc, 80, 24, 1000); - defer s.deinit(); - - // Fill up the first page. The final bytes in the first page - // are "boo!" - const first_page_rows = s.pages.pages.first.?.data.capacity.rows; - for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); - for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); - try s.testWriteString("boo!"); - try testing.expect(s.pages.pages.first == s.pages.pages.last); - try s.testWriteString("\n"); - try testing.expect(s.pages.pages.first != s.pages.pages.last); - try s.testWriteString("hello. boo!"); - - // Add both pages - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node.next.?); - - // Search should find nothing - try testing.expect(w.next() == null); - try testing.expect(w.next() == null); - - // We should've pruned our page because the second page - // has enough text to contain our needle. - try testing.expectEqual(1, w.meta.len()); -} - -test "SlidingWindow two pages no match keeps both pages" { - const testing = std.testing; - const alloc = testing.allocator; - - var s = try Screen.init(alloc, 80, 24, 1000); - defer s.deinit(); - - // Fill up the first page. The final bytes in the first page - // are "boo!" - const first_page_rows = s.pages.pages.first.?.data.capacity.rows; - for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); - for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); - try s.testWriteString("boo!"); - try testing.expect(s.pages.pages.first == s.pages.pages.last); - try s.testWriteString("\n"); - try testing.expect(s.pages.pages.first != s.pages.pages.last); - try s.testWriteString("hello. boo!"); - - // Imaginary needle for search. Doesn't match! - var needle_list: std.ArrayList(u8) = .empty; - defer needle_list.deinit(alloc); - try needle_list.appendNTimes(alloc, 'x', first_page_rows * s.pages.cols); - const needle: []const u8 = needle_list.items; - - var w = try SlidingWindow.init(alloc, needle); - defer w.deinit(); - - // Add both pages - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node.next.?); - - // Search should find nothing - try testing.expect(w.next() == null); - try testing.expect(w.next() == null); - - // No pruning because both pages are needed to fit needle. - try testing.expectEqual(2, w.meta.len()); -} - -test "SlidingWindow single append across circular buffer boundary" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "abc"); - defer w.deinit(); - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("XXXXXXXXXXXXXXXXXXXboo!XXXXX"); - - // We are trying to break a circular buffer boundary so the way we - // do this is to duplicate the data then do a failing search. This - // will cause the first page to be pruned. The next time we append we'll - // put it in the middle of the circ buffer. We assert this so that if - // our implementation changes our test will fail. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node); - { - // No wrap around yet - const slices = w.data.getPtrSlice(0, w.data.len()); - try testing.expect(slices[0].len > 0); - try testing.expect(slices[1].len == 0); - } - - // Search non-match, prunes page - try testing.expect(w.next() == null); - try testing.expectEqual(1, w.meta.len()); - - // Change the needle, just needs to be the same length (not a real API) - w.needle = "boo"; - - // Add new page, now wraps - try w.append(node); - { - const slices = w.data.getPtrSlice(0, w.data.len()); - try testing.expect(slices[0].len > 0); - try testing.expect(slices[1].len > 0); - } - { - const sel = w.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 19, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 21, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - try testing.expect(w.next() == null); -} - -test "SlidingWindow single append match on boundary" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.init(alloc, "abcd"); - defer w.deinit(); - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("o!XXXXXXXXXXXXXXXXXXXbo"); - - // We are trying to break a circular buffer boundary so the way we - // do this is to duplicate the data then do a failing search. This - // will cause the first page to be pruned. The next time we append we'll - // put it in the middle of the circ buffer. We assert this so that if - // our implementation changes our test will fail. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node); - { - // No wrap around yet - const slices = w.data.getPtrSlice(0, w.data.len()); - try testing.expect(slices[0].len > 0); - try testing.expect(slices[1].len == 0); - } - - // Search non-match, prunes page - try testing.expect(w.next() == null); - try testing.expectEqual(1, w.meta.len()); - - // Change the needle, just needs to be the same length (not a real API) - w.needle = "boo!"; - - // Add new page, now wraps - try w.append(node); - { - const slices = w.data.getPtrSlice(0, w.data.len()); - try testing.expect(slices[0].len > 0); - try testing.expect(slices[1].len > 0); - } - { - const sel = w.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 21, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 1, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - try testing.expect(w.next() == null); -} diff --git a/src/terminal/search/sliding_window.zig b/src/terminal/search/sliding_window.zig index 732a2d611..f27299db2 100644 --- a/src/terminal/search/sliding_window.zig +++ b/src/terminal/search/sliding_window.zig @@ -15,9 +15,9 @@ const PageFormatter = @import("../formatter.zig").PageFormatter; /// (2) we've accounted for overlaps across pages to fit the needle. /// /// The sliding window is first initialized empty. Pages are then appended -/// in the order to search them. If you're doing a reverse search then the -/// pages should be appended in reverse order and the needle should be -/// reversed. +/// in the order to search them. The sliding window supports both a forward +/// and reverse order specified via `init`. The pages should be appended +/// in the correct order matching the search direction. /// /// All appends grow the window. The window is only pruned when a search /// is done (positive or negative match) via `next()`. @@ -56,14 +56,27 @@ pub const SlidingWindow = struct { /// do enough to prune it. data_offset: usize = 0, - /// The needle we're searching for. Does not own the memory. + /// The needle we're searching for. Does own the memory. needle: []const u8, + /// The search direction. If the direction is forward then pages should + /// be appended in forward linked list order from the PageList. If the + /// direction is reverse then pages should be appended in reverse order. + /// + /// This is important because in most cases, a reverse search is going + /// to be more desirable to search from the end of the active area + /// backwards so more recent data is found first. Supporting both is + /// trivial though and will let us do more complex optimizations in the + /// future (e.g. starting from the viewport and doing a forward/reverse + /// concurrently from that point). + direction: Direction, + /// A buffer to store the overlap search data. This is used to search /// overlaps between pages where the match starts on one page and /// ends on another. The length is always `needle.len * 2`. overlap_buf: []u8, + const Direction = enum { forward, reverse }; const DataBuf = CircBuf(u8, 0); const MetaBuf = CircBuf(Meta, undefined); const Meta = struct { @@ -77,7 +90,8 @@ pub const SlidingWindow = struct { pub fn init( alloc: Allocator, - needle: []const u8, + direction: Direction, + needle_unowned: []const u8, ) Allocator.Error!SlidingWindow { var data = try DataBuf.init(alloc, 0); errdefer data.deinit(alloc); @@ -85,6 +99,13 @@ pub const SlidingWindow = struct { var meta = try MetaBuf.init(alloc, 0); errdefer meta.deinit(alloc); + const needle = try alloc.dupe(u8, needle_unowned); + errdefer alloc.free(needle); + switch (direction) { + .forward => {}, + .reverse => std.mem.reverse(u8, needle), + } + const overlap_buf = try alloc.alloc(u8, needle.len * 2); errdefer alloc.free(overlap_buf); @@ -93,12 +114,14 @@ pub const SlidingWindow = struct { .data = data, .meta = meta, .needle = needle, + .direction = direction, .overlap_buf = overlap_buf, }; } pub fn deinit(self: *SlidingWindow) void { self.alloc.free(self.overlap_buf); + self.alloc.free(self.needle); self.data.deinit(self.alloc); var meta_it = self.meta.iterator(.forward); @@ -298,7 +321,10 @@ pub const SlidingWindow = struct { } self.assertIntegrity(); - return .init(tl, br, false); + return switch (self.direction) { + .forward => .init(tl, br, false), + .reverse => .init(br, tl, false), + }; } /// Convert a data index into a pin. @@ -376,17 +402,35 @@ pub const SlidingWindow = struct { }; assert(meta.cell_map.items.len == encoded.written().len); + // Get our written data. If we're doing a reverse search then we + // need to reverse all our encodings. + const written = encoded.written(); + switch (self.direction) { + .forward => {}, + .reverse => { + std.mem.reverse(u8, written); + std.mem.reverse(point.Coordinate, meta.cell_map.items); + }, + } + // Ensure our buffers are big enough to store what we need. - try self.data.ensureUnusedCapacity(self.alloc, encoded.written().len); + try self.data.ensureUnusedCapacity(self.alloc, written.len); try self.meta.ensureUnusedCapacity(self.alloc, 1); // Append our new node to the circular buffer. - try self.data.appendSlice(encoded.written()); + try self.data.appendSlice(written); try self.meta.append(meta); self.assertIntegrity(); } + /// Only for tests! + fn testChangeNeedle(self: *SlidingWindow, new: []const u8) void { + assert(new.len == self.needle.len); + self.alloc.free(self.needle); + self.needle = self.alloc.dupe(u8, new) catch unreachable; + } + fn assertIntegrity(self: *const SlidingWindow) void { if (comptime !std.debug.runtime_safety) return; @@ -410,7 +454,7 @@ test "SlidingWindow empty on init" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.init(alloc, "boo!"); + var w: SlidingWindow = try .init(alloc, .forward, "boo!"); defer w.deinit(); try testing.expectEqual(0, w.data.len()); try testing.expectEqual(0, w.meta.len()); @@ -420,7 +464,7 @@ test "SlidingWindow single append" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.init(alloc, "boo!"); + var w: SlidingWindow = try .init(alloc, .forward, "boo!"); defer w.deinit(); var s = try Screen.init(alloc, 80, 24, 0); @@ -463,7 +507,7 @@ test "SlidingWindow single append no match" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.init(alloc, "nope!"); + var w: SlidingWindow = try .init(alloc, .forward, "nope!"); defer w.deinit(); var s = try Screen.init(alloc, 80, 24, 0); @@ -487,7 +531,7 @@ test "SlidingWindow two pages" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.init(alloc, "boo!"); + var w: SlidingWindow = try .init(alloc, .forward, "boo!"); defer w.deinit(); var s = try Screen.init(alloc, 80, 24, 1000); @@ -540,7 +584,7 @@ test "SlidingWindow two pages match across boundary" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.init(alloc, "hello, world"); + var w: SlidingWindow = try .init(alloc, .forward, "hello, world"); defer w.deinit(); var s = try Screen.init(alloc, 80, 24, 1000); @@ -584,7 +628,7 @@ test "SlidingWindow two pages no match prunes first page" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.init(alloc, "nope!"); + var w: SlidingWindow = try .init(alloc, .forward, "nope!"); defer w.deinit(); var s = try Screen.init(alloc, 80, 24, 1000); @@ -639,7 +683,7 @@ test "SlidingWindow two pages no match keeps both pages" { try needle_list.appendNTimes(alloc, 'x', first_page_rows * s.pages.cols); const needle: []const u8 = needle_list.items; - var w = try SlidingWindow.init(alloc, needle); + var w: SlidingWindow = try .init(alloc, .forward, needle); defer w.deinit(); // Add both pages @@ -659,7 +703,7 @@ test "SlidingWindow single append across circular buffer boundary" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.init(alloc, "abc"); + var w: SlidingWindow = try .init(alloc, .forward, "abc"); defer w.deinit(); var s = try Screen.init(alloc, 80, 24, 0); @@ -687,7 +731,7 @@ test "SlidingWindow single append across circular buffer boundary" { try testing.expectEqual(1, w.meta.len()); // Change the needle, just needs to be the same length (not a real API) - w.needle = "boo"; + w.testChangeNeedle("boo"); // Add new page, now wraps try w.append(node); @@ -714,7 +758,7 @@ test "SlidingWindow single append match on boundary" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.init(alloc, "abcd"); + var w: SlidingWindow = try .init(alloc, .forward, "abcd"); defer w.deinit(); var s = try Screen.init(alloc, 80, 24, 0); @@ -742,7 +786,359 @@ test "SlidingWindow single append match on boundary" { try testing.expectEqual(1, w.meta.len()); // Change the needle, just needs to be the same length (not a real API) - w.needle = "boo!"; + w.testChangeNeedle("boo!"); + + // Add new page, now wraps + try w.append(node); + { + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len > 0); + } + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 21, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 1, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); +} + +test "SlidingWindow single append reversed" { + const testing = std.testing; + const alloc = testing.allocator; + + var w: SlidingWindow = try .init(alloc, .reverse, "boo!"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello. boo! hello. boo!"); + + // We want to test single-page cases. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + + // We should be able to find two matches. + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 19, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 22, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 10, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); +} + +test "SlidingWindow single append no match reversed" { + const testing = std.testing; + const alloc = testing.allocator; + + var w: SlidingWindow = try .init(alloc, .reverse, "nope!"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello. boo! hello. boo!"); + + // We want to test single-page cases. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + + // No matches + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); + + // Should still keep the page + try testing.expectEqual(1, w.meta.len()); +} + +test "SlidingWindow two pages reversed" { + const testing = std.testing; + const alloc = testing.allocator; + + var w: SlidingWindow = try .init(alloc, .reverse, "boo!"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\n"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + try s.testWriteString("hello. boo!"); + + // Add both pages in reverse order + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node.next.?); + try w.append(node); + + // Search should find two matches (in reverse order) + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 23, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 10, + .y = 23, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 76, + .y = 22, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 79, + .y = 22, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); +} + +test "SlidingWindow two pages match across boundary reversed" { + const testing = std.testing; + const alloc = testing.allocator; + + var w: SlidingWindow = try .init(alloc, .reverse, "hello, world"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "hell" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("hell"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("o, world!"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + + // Add both pages in reverse order + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node.next.?); + try w.append(node); + + // Search should find a match + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 76, + .y = 22, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 23, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); + + // In reverse mode, the last appended meta (first original page) is large + // enough to contain needle.len - 1 bytes, so pruning occurs + try testing.expectEqual(1, w.meta.len()); +} + +test "SlidingWindow two pages no match prunes first page reversed" { + const testing = std.testing; + const alloc = testing.allocator; + + var w: SlidingWindow = try .init(alloc, .reverse, "nope!"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\n"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + try s.testWriteString("hello. boo!"); + + // Add both pages in reverse order + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node.next.?); + try w.append(node); + + // Search should find nothing + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); + + // We should've pruned our page because the second page + // has enough text to contain our needle. + try testing.expectEqual(1, w.meta.len()); +} + +test "SlidingWindow two pages no match keeps both pages reversed" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\n"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + try s.testWriteString("hello. boo!"); + + // Imaginary needle for search. Doesn't match! + var needle_list: std.ArrayList(u8) = .empty; + defer needle_list.deinit(alloc); + try needle_list.appendNTimes(alloc, 'x', first_page_rows * s.pages.cols); + const needle: []const u8 = needle_list.items; + + var w: SlidingWindow = try .init(alloc, .reverse, needle); + defer w.deinit(); + + // Add both pages in reverse order + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node.next.?); + try w.append(node); + + // Search should find nothing + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); + + // No pruning because both pages are needed to fit needle. + try testing.expectEqual(2, w.meta.len()); +} + +test "SlidingWindow single append across circular buffer boundary reversed" { + const testing = std.testing; + const alloc = testing.allocator; + + var w: SlidingWindow = try .init(alloc, .reverse, "abc"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("XXXXXXXXXXXXXXXXXXXboo!XXXXX"); + + // We are trying to break a circular buffer boundary so the way we + // do this is to duplicate the data then do a failing search. This + // will cause the first page to be pruned. The next time we append we'll + // put it in the middle of the circ buffer. We assert this so that if + // our implementation changes our test will fail. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + try w.append(node); + { + // No wrap around yet + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len == 0); + } + + // Search non-match, prunes page + try testing.expect(w.next() == null); + try testing.expectEqual(1, w.meta.len()); + + // Change the needle, just needs to be the same length (not a real API) + // testChangeNeedle doesn't reverse, so pass reversed needle for reverse mode + w.testChangeNeedle("oob"); + + // Add new page, now wraps + try w.append(node); + { + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len > 0); + } + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 19, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 21, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); +} + +test "SlidingWindow single append match on boundary reversed" { + const testing = std.testing; + const alloc = testing.allocator; + + var w: SlidingWindow = try .init(alloc, .reverse, "abcd"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("o!XXXXXXXXXXXXXXXXXXXbo"); + + // We are trying to break a circular buffer boundary so the way we + // do this is to duplicate the data then do a failing search. This + // will cause the first page to be pruned. The next time we append we'll + // put it in the middle of the circ buffer. We assert this so that if + // our implementation changes our test will fail. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(node); + try w.append(node); + { + // No wrap around yet + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len == 0); + } + + // Search non-match, prunes page + try testing.expect(w.next() == null); + try testing.expectEqual(1, w.meta.len()); + + // Change the needle, just needs to be the same length (not a real API) + // testChangeNeedle doesn't reverse, so pass reversed needle for reverse mode + w.testChangeNeedle("!oob"); // Add new page, now wraps try w.append(node); From 0ea350a8f28eea5567ef8d3191599f549cc5d7c0 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 12 Nov 2025 10:27:52 -0800 Subject: [PATCH 05/10] terminal: ActiveSearch for searching the active area --- src/terminal/search.zig | 1 + src/terminal/search/active.zig | 168 +++++++++++++++++++++++++ src/terminal/search/sliding_window.zig | 79 +++++++----- 3 files changed, 215 insertions(+), 33 deletions(-) create mode 100644 src/terminal/search/active.zig diff --git a/src/terminal/search.zig b/src/terminal/search.zig index a375c6ece..724b5c171 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -1,5 +1,6 @@ //! Search functionality for the terminal. +pub const Active = @import("search/active.zig").ActiveSearch; pub const PageList = @import("search/pagelist.zig").PageListSearch; pub const Thread = @import("search/Thread.zig"); diff --git a/src/terminal/search/active.zig b/src/terminal/search/active.zig new file mode 100644 index 000000000..b682c6df3 --- /dev/null +++ b/src/terminal/search/active.zig @@ -0,0 +1,168 @@ +const std = @import("std"); +const testing = std.testing; +const Allocator = std.mem.Allocator; +const point = @import("../point.zig"); +const size = @import("../size.zig"); +const PageList = @import("../PageList.zig"); +const Selection = @import("../Selection.zig"); +const SlidingWindow = @import("sliding_window.zig").SlidingWindow; +const Terminal = @import("../Terminal.zig"); + +/// Searches for a substring within the active area of a PageList. +/// +/// The distinction for "active area" is important because it is the +/// only part of a PageList that is mutable. Therefore, its the only part +/// of the terminal that needs to be repeatedly searched as the contents +/// change. +/// +/// This struct specializes in searching only within that active area, +/// and handling the active area moving as new lines are added to the bottom. +pub const ActiveSearch = struct { + window: SlidingWindow, + + pub fn init( + alloc: Allocator, + needle: []const u8, + ) Allocator.Error!ActiveSearch { + // We just do a forward search since the active area is usually + // pretty small so search results are instant anyways. This avoids + // a small amount of work to reverse things. + var window: SlidingWindow = try .init(alloc, .forward, needle); + errdefer window.deinit(); + return .{ .window = window }; + } + + pub fn deinit(self: *ActiveSearch) void { + self.window.deinit(); + } + + /// Update the active area to reflect the current state of the PageList. + /// + /// This doesn't do the search, it only copies the necessary data + /// to perform the search later. This lets the caller hold the lock + /// on the PageList for a minimal amount of time. + /// + /// This returns the first page (in reverse order) NOT searched by + /// this active area. This is useful for callers that want to follow up + /// with populating the scrollback searcher. The scrollback searcher + /// should start searching from the returned page backwards. + /// + /// If the return value is null it means the active area covers the entire + /// PageList, currently. + pub fn update( + self: *ActiveSearch, + list: *const PageList, + ) Allocator.Error!?*PageList.List.Node { + // Clear our previous sliding window + self.window.clearAndRetainCapacity(); + + // First up, add enough pages to cover the active area. + var rem: usize = list.rows; + var node_ = list.pages.last; + while (node_) |node| : (node_ = node.prev) { + _ = try self.window.append(node); + + // If we reached our target amount, then this is the last + // page that contains the active area. We go to the previous + // page once more since its the first page of our required + // overlap. + if (rem <= node.data.size.rows) { + node_ = node.prev; + break; + } + + rem -= node.data.size.rows; + } + + // Next, add enough overlap to cover needle.len - 1 bytes (if it + // exists) so we can cover the overlap. + rem = self.window.needle.len - 1; + while (node_) |node| : (node_ = node.prev) { + const added = try self.window.append(node); + if (added >= rem) { + node_ = node.prev; + break; + } + rem -= added; + } + + // Return the first page NOT covered by the active area. + return node_; + } + + /// Find the next match for the needle in the active area. This returns + /// null when there are no more matches. + pub fn next(self: *ActiveSearch) ?Selection { + return self.window.next(); + } +}; + +test "simple search" { + const alloc = testing.allocator; + var t: Terminal = try .init(alloc, .{ .cols = 10, .rows = 10 }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + try s.nextSlice("Fizz\r\nBuzz\r\nFizz\r\nBang"); + + var search: ActiveSearch = try .init(alloc, "Fizz"); + defer search.deinit(); + _ = try search.update(&t.screen.pages); + + { + const sel = search.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 0, + .y = 0, + } }, t.screen.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 3, + .y = 0, + } }, t.screen.pages.pointFromPin(.active, sel.end()).?); + } + { + const sel = search.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 0, + .y = 2, + } }, t.screen.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 3, + .y = 2, + } }, t.screen.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(search.next() == null); +} + +test "clear screen and search" { + const alloc = testing.allocator; + var t: Terminal = try .init(alloc, .{ .cols = 10, .rows = 10 }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + try s.nextSlice("Fizz\r\nBuzz\r\nFizz\r\nBang"); + + var search: ActiveSearch = try .init(alloc, "Fizz"); + defer search.deinit(); + _ = try search.update(&t.screen.pages); + + try s.nextSlice("\x1b[2J"); // Clear screen + try s.nextSlice("\x1b[H"); // Move cursor home + try s.nextSlice("Buzz\r\nFizz\r\nBuzz"); + _ = try search.update(&t.screen.pages); + + { + const sel = search.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 0, + .y = 1, + } }, t.screen.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 3, + .y = 1, + } }, t.screen.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(search.next() == null); +} diff --git a/src/terminal/search/sliding_window.zig b/src/terminal/search/sliding_window.zig index f27299db2..29c612691 100644 --- a/src/terminal/search/sliding_window.zig +++ b/src/terminal/search/sliding_window.zig @@ -142,6 +142,14 @@ pub const SlidingWindow = struct { /// the window moves, the window will prune itself while maintaining /// the invariant that the window is always big enough to contain /// the needle. + /// + /// It may seem wasteful to return a full selection, since the needle + /// length is known it seems like we can get away with just returning + /// the start index. However, returning a full selection will give us + /// more flexibility in the future (e.g. if we want to support regex + /// searches or other more complex searches). It does cost us some memory, + /// but searches are expected to be relatively rare compared to normal + /// operations and can eat up some extra memory temporarily. pub fn next(self: *SlidingWindow) ?Selection { const slices = slices: { // If we have less data then the needle then we can't possibly match @@ -368,10 +376,14 @@ pub const SlidingWindow = struct { /// Add a new node to the sliding window. This will always grow /// the sliding window; data isn't pruned until it is consumed /// via a search (via next()). + /// + /// Returns the number of bytes of content added to the sliding window. + /// The total bytes will be larger since this omits metadata, but it is + /// an accurate measure of the text content size added. pub fn append( self: *SlidingWindow, node: *PageList.List.Node, - ) Allocator.Error!void { + ) Allocator.Error!usize { // Initialize our metadata for the node. var meta: Meta = .{ .node = node, @@ -422,6 +434,7 @@ pub const SlidingWindow = struct { try self.meta.append(meta); self.assertIntegrity(); + return written.len; } /// Only for tests! @@ -474,7 +487,7 @@ test "SlidingWindow single append" { // We want to test single-page cases. try testing.expect(s.pages.pages.first == s.pages.pages.last); const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); + _ = try w.append(node); // We should be able to find two matches. { @@ -517,7 +530,7 @@ test "SlidingWindow single append no match" { // We want to test single-page cases. try testing.expect(s.pages.pages.first == s.pages.pages.last); const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); + _ = try w.append(node); // No matches try testing.expect(w.next() == null); @@ -550,8 +563,8 @@ test "SlidingWindow two pages" { // Add both pages const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node.next.?); + _ = try w.append(node); + _ = try w.append(node.next.?); // Search should find two matches { @@ -602,8 +615,8 @@ test "SlidingWindow two pages match across boundary" { // Add both pages const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node.next.?); + _ = try w.append(node); + _ = try w.append(node.next.?); // Search should find a match { @@ -647,8 +660,8 @@ test "SlidingWindow two pages no match prunes first page" { // Add both pages const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node.next.?); + _ = try w.append(node); + _ = try w.append(node.next.?); // Search should find nothing try testing.expect(w.next() == null); @@ -688,8 +701,8 @@ test "SlidingWindow two pages no match keeps both pages" { // Add both pages const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node.next.?); + _ = try w.append(node); + _ = try w.append(node.next.?); // Search should find nothing try testing.expect(w.next() == null); @@ -717,8 +730,8 @@ test "SlidingWindow single append across circular buffer boundary" { // our implementation changes our test will fail. try testing.expect(s.pages.pages.first == s.pages.pages.last); const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node); + _ = try w.append(node); + _ = try w.append(node); { // No wrap around yet const slices = w.data.getPtrSlice(0, w.data.len()); @@ -734,7 +747,7 @@ test "SlidingWindow single append across circular buffer boundary" { w.testChangeNeedle("boo"); // Add new page, now wraps - try w.append(node); + _ = try w.append(node); { const slices = w.data.getPtrSlice(0, w.data.len()); try testing.expect(slices[0].len > 0); @@ -772,8 +785,8 @@ test "SlidingWindow single append match on boundary" { // our implementation changes our test will fail. try testing.expect(s.pages.pages.first == s.pages.pages.last); const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node); + _ = try w.append(node); + _ = try w.append(node); { // No wrap around yet const slices = w.data.getPtrSlice(0, w.data.len()); @@ -789,7 +802,7 @@ test "SlidingWindow single append match on boundary" { w.testChangeNeedle("boo!"); // Add new page, now wraps - try w.append(node); + _ = try w.append(node); { const slices = w.data.getPtrSlice(0, w.data.len()); try testing.expect(slices[0].len > 0); @@ -823,7 +836,7 @@ test "SlidingWindow single append reversed" { // We want to test single-page cases. try testing.expect(s.pages.pages.first == s.pages.pages.last); const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); + _ = try w.append(node); // We should be able to find two matches. { @@ -866,7 +879,7 @@ test "SlidingWindow single append no match reversed" { // We want to test single-page cases. try testing.expect(s.pages.pages.first == s.pages.pages.last); const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); + _ = try w.append(node); // No matches try testing.expect(w.next() == null); @@ -899,8 +912,8 @@ test "SlidingWindow two pages reversed" { // Add both pages in reverse order const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node.next.?); - try w.append(node); + _ = try w.append(node.next.?); + _ = try w.append(node); // Search should find two matches (in reverse order) { @@ -951,8 +964,8 @@ test "SlidingWindow two pages match across boundary reversed" { // Add both pages in reverse order const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node.next.?); - try w.append(node); + _ = try w.append(node.next.?); + _ = try w.append(node); // Search should find a match { @@ -997,8 +1010,8 @@ test "SlidingWindow two pages no match prunes first page reversed" { // Add both pages in reverse order const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node.next.?); - try w.append(node); + _ = try w.append(node.next.?); + _ = try w.append(node); // Search should find nothing try testing.expect(w.next() == null); @@ -1038,8 +1051,8 @@ test "SlidingWindow two pages no match keeps both pages reversed" { // Add both pages in reverse order const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node.next.?); - try w.append(node); + _ = try w.append(node.next.?); + _ = try w.append(node); // Search should find nothing try testing.expect(w.next() == null); @@ -1067,8 +1080,8 @@ test "SlidingWindow single append across circular buffer boundary reversed" { // our implementation changes our test will fail. try testing.expect(s.pages.pages.first == s.pages.pages.last); const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node); + _ = try w.append(node); + _ = try w.append(node); { // No wrap around yet const slices = w.data.getPtrSlice(0, w.data.len()); @@ -1085,7 +1098,7 @@ test "SlidingWindow single append across circular buffer boundary reversed" { w.testChangeNeedle("oob"); // Add new page, now wraps - try w.append(node); + _ = try w.append(node); { const slices = w.data.getPtrSlice(0, w.data.len()); try testing.expect(slices[0].len > 0); @@ -1123,8 +1136,8 @@ test "SlidingWindow single append match on boundary reversed" { // our implementation changes our test will fail. try testing.expect(s.pages.pages.first == s.pages.pages.last); const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(node); - try w.append(node); + _ = try w.append(node); + _ = try w.append(node); { // No wrap around yet const slices = w.data.getPtrSlice(0, w.data.len()); @@ -1141,7 +1154,7 @@ test "SlidingWindow single append match on boundary reversed" { w.testChangeNeedle("!oob"); // Add new page, now wraps - try w.append(node); + _ = try w.append(node); { const slices = w.data.getPtrSlice(0, w.data.len()); try testing.expect(slices[0].len > 0); From 22496b8f0e95ffcd7af4263b33a5d566456e9793 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Thu, 13 Nov 2025 10:07:08 -0800 Subject: [PATCH 06/10] terminal: sliding window needs to handle hard-wraps properly (tested) --- src/terminal/formatter.zig | 2 +- src/terminal/search/sliding_window.zig | 91 ++++++++++++++++++++++++-- 2 files changed, 88 insertions(+), 5 deletions(-) diff --git a/src/terminal/formatter.zig b/src/terminal/formatter.zig index 46cc971c8..6683b3453 100644 --- a/src/terminal/formatter.zig +++ b/src/terminal/formatter.zig @@ -849,7 +849,7 @@ pub const PageFormatter = struct { /// Initializes a page formatter. Other options can be set directly on the /// struct after initialization and before calling `format()`. pub fn init(page: *const Page, opts: Options) PageFormatter { - return PageFormatter{ + return .{ .page = page, .opts = opts, .start_x = 0, diff --git a/src/terminal/search/sliding_window.zig b/src/terminal/search/sliding_window.zig index 29c612691..4a2c3eb7d 100644 --- a/src/terminal/search/sliding_window.zig +++ b/src/terminal/search/sliding_window.zig @@ -414,6 +414,20 @@ pub const SlidingWindow = struct { }; assert(meta.cell_map.items.len == encoded.written().len); + // If the node we're adding isn't soft-wrapped, we add the + // trailing newline. + const row = node.data.getRow(node.data.size.rows - 1); + if (!row.wrap) { + encoded.writer.writeByte('\n') catch return error.OutOfMemory; + try meta.cell_map.append( + self.alloc, + meta.cell_map.getLastOrNull() orelse .{ + .x = 0, + .y = 0, + }, + ); + } + // Get our written data. If we're doing a reverse search then we // need to reverse all our encodings. const written = encoded.written(); @@ -637,6 +651,69 @@ test "SlidingWindow two pages match across boundary" { try testing.expectEqual(2, w.meta.len()); } +test "SlidingWindow two pages no match across boundary with newline" { + const testing = std.testing; + const alloc = testing.allocator; + + var w: SlidingWindow = try .init(alloc, .forward, "hello, world"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("hell"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\no, world!"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + _ = try w.append(node); + _ = try w.append(node.next.?); + + // Search should NOT find a match + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); + + // We shouldn't prune because we don't have enough space + try testing.expectEqual(2, w.meta.len()); +} + +test "SlidingWindow two pages no match across boundary with newline reverse" { + const testing = std.testing; + const alloc = testing.allocator; + + var w: SlidingWindow = try .init(alloc, .reverse, "hello, world"); + defer w.deinit(); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("hell"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\no, world!"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + + // Add both pages in reverse order + const node: *PageList.List.Node = s.pages.pages.first.?; + _ = try w.append(node.next.?); + _ = try w.append(node); + + // Search should NOT find a match + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); +} + test "SlidingWindow two pages no match prunes first page" { const testing = std.testing; const alloc = testing.allocator; @@ -778,13 +855,16 @@ test "SlidingWindow single append match on boundary" { defer s.deinit(); try s.testWriteString("o!XXXXXXXXXXXXXXXXXXXbo"); + // We need to surgically modify the last row to be soft-wrapped + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + node.data.getRow(node.data.size.rows - 1).wrap = true; + // We are trying to break a circular buffer boundary so the way we // do this is to duplicate the data then do a failing search. This // will cause the first page to be pruned. The next time we append we'll // put it in the middle of the circ buffer. We assert this so that if // our implementation changes our test will fail. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; _ = try w.append(node); _ = try w.append(node); { @@ -1129,13 +1209,16 @@ test "SlidingWindow single append match on boundary reversed" { defer s.deinit(); try s.testWriteString("o!XXXXXXXXXXXXXXXXXXXbo"); + // We need to surgically modify the last row to be soft-wrapped + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + node.data.getRow(node.data.size.rows - 1).wrap = true; + // We are trying to break a circular buffer boundary so the way we // do this is to duplicate the data then do a failing search. This // will cause the first page to be pruned. The next time we append we'll // put it in the middle of the circ buffer. We assert this so that if // our implementation changes our test will fail. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; _ = try w.append(node); _ = try w.append(node); { From 2b647ba4cb94ff4be9c1500991c15436ae9634c5 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Thu, 13 Nov 2025 09:25:27 -0800 Subject: [PATCH 07/10] terminal: PageListSearch updated to split next and feed --- src/terminal/search/pagelist.zig | 392 +++++++++++++++++++++++++++---- src/terminal/search/screen.zig | 33 +++ 2 files changed, 376 insertions(+), 49 deletions(-) create mode 100644 src/terminal/search/screen.zig diff --git a/src/terminal/search/pagelist.zig b/src/terminal/search/pagelist.zig index cb9d0ee45..b1ad88e81 100644 --- a/src/terminal/search/pagelist.zig +++ b/src/terminal/search/pagelist.zig @@ -1,29 +1,7 @@ -//! Search functionality for the terminal. -//! -//! At the time of writing this comment, this is a **work in progress**. -//! -//! Search at the time of writing is implemented using a simple -//! boyer-moore-horspool algorithm. The suboptimal part of the implementation -//! is that we need to encode each terminal page into a text buffer in order -//! to apply BMH to it. This is because the terminal page is not laid out -//! in a flat text form. -//! -//! To minimize memory usage, we use a sliding window to search for the -//! needle. The sliding window only keeps the minimum amount of page data -//! in memory to search for a needle (i.e. `needle.len - 1` bytes of overlap -//! between terminal pages). -//! -//! Future work: -//! -//! - PageListSearch on a PageList concurrently with another thread -//! - Handle pruned pages in a PageList to ensure we don't keep references -//! - Repeat search a changing active area of the screen -//! - Reverse search so that more recent matches are found first -//! - const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; +const testing = std.testing; const CircBuf = @import("../../datastruct/main.zig").CircBuf; const terminal = @import("../main.zig"); const point = terminal.point; @@ -33,13 +11,24 @@ const Pin = PageList.Pin; const Selection = terminal.Selection; const Screen = terminal.Screen; const PageFormatter = @import("../formatter.zig").PageFormatter; +const Terminal = @import("../Terminal.zig"); const SlidingWindow = @import("sliding_window.zig").SlidingWindow; /// Searches for a term in a PageList structure. /// -/// At the time of writing, this does not support searching a pagelist -/// simultaneously as its being used by another thread. This will be resolved -/// in the future. +/// This searches in reverse order starting from the given node. +/// +/// This assumes that nodes do not change contents. For nodes that change +/// contents, look at ActiveSearch, which is designed to re-search the active +/// area since it assumed to change. When integrating ActiveSearch with +/// PageListSearch, the caller should start the PageListSearch from the +/// returned node from ActiveSearch.update(). +/// +/// Concurrent access to a PageList or nodes in a PageList are not allowed, +/// so the caller should ensure that necessary locks are held. Each function +/// documents whether it accesses the PageList or not. For example, you can +/// safely call `next()` without holding a lock, but you must hold a lock +/// while calling `feed()`. pub const PageListSearch = struct { /// The list we're searching. list: *PageList, @@ -47,50 +36,355 @@ pub const PageListSearch = struct { /// The sliding window of page contents and nodes to search. window: SlidingWindow, + /// The tracked pin for our current position in the pagelist. This + /// will always point to the CURRENT node we're searching from so that + /// we can track if we move. + pin: *Pin, + /// Initialize the page list search. The needle is copied so it can /// be freed immediately. + /// + /// Accesses the PageList/Node so the caller must ensure it is safe + /// to do so if there is any concurrent access. pub fn init( alloc: Allocator, - list: *PageList, needle: []const u8, + list: *PageList, + start: *PageList.List.Node, ) Allocator.Error!PageListSearch { - var window: SlidingWindow = try .init(alloc, .forward, needle); + // We put a tracked pin into the node that we're starting from. + // By using a tracked pin, we can keep our pagelist references safe + // because if the pagelist prunes pages, the tracked pin will + // be moved somewhere safe. + const pin = try list.trackPin(.{ + .node = start, + .y = start.data.size.rows - 1, + .x = start.data.size.cols - 1, + }); + errdefer list.untrackPin(pin); + + // Create our sliding window we'll use for searching. + var window: SlidingWindow = try .init(alloc, .reverse, needle); errdefer window.deinit(); + // We always feed our initial page data into the window, because + // we have the lock anyways and this lets our `pin` point to our + // current node and feed to work properly. + _ = try window.append(start); + return .{ .list = list, .window = window, + .pin = pin, }; } + /// Modifies the PageList (to untrack a pin) so the caller must ensure + /// that it is safe to do so. pub fn deinit(self: *PageListSearch) void { self.window.deinit(); + self.list.untrackPin(self.pin); } - /// Find the next match for the needle in the pagelist. This returns - /// null when there are no more matches. - pub fn next(self: *PageListSearch) Allocator.Error!?Selection { - // Try to search for the needle in the window. If we find a match - // then we can return that and we're done. - if (self.window.next()) |sel| return sel; + /// Return the next match in the loaded page nodes. If this returns + /// null then the PageList search needs to be fed the next node(s). + /// Call, `feed` to do this. + /// + /// Beware that the selection returned may point to a node that + /// is freed if the caller does not hold necessary locks on the + /// PageList while searching. The pins should be validated prior to + /// final use. + /// + /// This does NOT access the PageList, so it can be called without + /// a lock held. + pub fn next(self: *PageListSearch) ?Selection { + return self.window.next(); + } - // Get our next node. If we have a value in our window then we - // can determine the next node. If we don't, we've never setup the - // window so we use our first node. - var node_: ?*PageList.List.Node = if (self.window.meta.last()) |meta| - meta.node.next - else - self.list.pages.first; + /// Feed more data to the sliding window from the pagelist. This will + /// feed enough data to cover at least one match (needle length) if it + /// exists; this doesn't perform the search, it only feeds data. + /// + /// This accesses nodes in the PageList, so the caller must ensure + /// it is safe to do so (i.e. hold necessary locks). + /// + /// This returns false if there is no more data to feed. This essentially + /// means we've searched the entire pagelist. + pub fn feed(self: *PageListSearch) Allocator.Error!bool { + // Add at least enough data to find a single match. + var rem = self.window.needle.len; - // Add one pagelist node at a time, look for matches, and repeat - // until we find a match or we reach the end of the pagelist. - // This append then next pattern limits memory usage of the window. - while (node_) |node| : (node_ = node.next) { - try self.window.append(node); - if (self.window.next()) |sel| return sel; + // Start at our previous node and then continue adding until we + // get our desired amount of data. + var node_: ?*PageList.List.Node = self.pin.node.prev; + while (node_) |node| : (node_ = node.prev) { + rem -|= try self.window.append(node); + + // Move our tracked pin to the new node. + self.pin.node = node; + + if (rem == 0) break; } - // We've reached the end of the pagelist, no matches. - return null; + // True if we fed any data. + return rem < self.window.needle.len; } }; + +test "simple search" { + const alloc = testing.allocator; + var t: Terminal = try .init(alloc, .{ .cols = 10, .rows = 10 }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + try s.nextSlice("Fizz\r\nBuzz\r\nFizz\r\nBang"); + + var search: PageListSearch = try .init( + alloc, + "Fizz", + &t.screen.pages, + t.screen.pages.pages.last.?, + ); + defer search.deinit(); + + { + const sel = search.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 0, + .y = 2, + } }, t.screen.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 3, + .y = 2, + } }, t.screen.pages.pointFromPin(.active, sel.end()).?); + } + { + const sel = search.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 0, + .y = 0, + } }, t.screen.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 3, + .y = 0, + } }, t.screen.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(search.next() == null); + + // We should not be able to feed since we have one page + try testing.expect(!try search.feed()); +} + +test "feed multiple pages with matches" { + const alloc = testing.allocator; + var t: Terminal = try .init(alloc, .{ .cols = 10, .rows = 10 }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + + // Fill up first page + const first_page_rows = t.screen.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.nextSlice("\r\n"); + try s.nextSlice("Fizz"); + try testing.expect(t.screen.pages.pages.first == t.screen.pages.pages.last); + + // Create second page + try s.nextSlice("\r\n"); + try testing.expect(t.screen.pages.pages.first != t.screen.pages.pages.last); + try s.nextSlice("Buzz\r\nFizz"); + + var search: PageListSearch = try .init( + alloc, + "Fizz", + &t.screen.pages, + t.screen.pages.pages.last.?, + ); + defer search.deinit(); + + // First match on the last page + const sel1 = search.next(); + try testing.expect(sel1 != null); + try testing.expect(search.next() == null); + + // Feed should succeed and load the first page + try testing.expect(try search.feed()); + + // Now we should find the match on the first page + const sel2 = search.next(); + try testing.expect(sel2 != null); + try testing.expect(search.next() == null); + + // No more pages to feed + try testing.expect(!try search.feed()); +} + +test "feed multiple pages no matches" { + const alloc = testing.allocator; + var t: Terminal = try .init(alloc, .{ .cols = 10, .rows = 10 }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + + // Fill up first page + const first_page_rows = t.screen.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.nextSlice("\r\n"); + try s.nextSlice("Hello"); + + // Create second page + try s.nextSlice("\r\n"); + try testing.expect(t.screen.pages.pages.first != t.screen.pages.pages.last); + try s.nextSlice("World"); + + var search: PageListSearch = try .init( + alloc, + "Nope", + &t.screen.pages, + t.screen.pages.pages.last.?, + ); + defer search.deinit(); + + // No matches on last page + try testing.expect(search.next() == null); + + // Feed first page + try testing.expect(try search.feed()); + + // Still no matches + try testing.expect(search.next() == null); + + // No more pages + try testing.expect(!try search.feed()); +} + +test "feed iteratively through multiple matches" { + const alloc = testing.allocator; + var t: Terminal = try .init(alloc, .{ .cols = 80, .rows = 24 }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + + const first_page_rows = t.screen.pages.pages.first.?.data.capacity.rows; + + // Fill first page with a match at the end + for (0..first_page_rows - 1) |_| try s.nextSlice("\r\n"); + try s.nextSlice("Page1Test"); + try testing.expect(t.screen.pages.pages.first == t.screen.pages.pages.last); + + // Create second page with a match + try s.nextSlice("\r\n"); + try testing.expect(t.screen.pages.pages.first != t.screen.pages.pages.last); + try s.nextSlice("Page2Test"); + + var search: PageListSearch = try .init( + alloc, + "Test", + &t.screen.pages, + t.screen.pages.pages.last.?, + ); + defer search.deinit(); + + // Match on page 2 + try testing.expect(search.next() != null); + try testing.expect(search.next() == null); + + // Feed page 1 + try testing.expect(try search.feed()); + try testing.expect(search.next() != null); + try testing.expect(search.next() == null); + + // No more pages + try testing.expect(!try search.feed()); +} + +test "feed with match spanning page boundary" { + const alloc = testing.allocator; + var t: Terminal = try .init(alloc, .{ .cols = 80, .rows = 24 }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + + const first_page_rows = t.screen.pages.pages.first.?.data.capacity.rows; + + // Fill first page ending with "Te" + for (0..first_page_rows - 1) |_| try s.nextSlice("\r\n"); + for (0..t.screen.pages.cols - 2) |_| try s.nextSlice("x"); + try s.nextSlice("Te"); + try testing.expect(t.screen.pages.pages.first == t.screen.pages.pages.last); + + // Second page starts with "st" + try s.nextSlice("st"); + try testing.expect(t.screen.pages.pages.first != t.screen.pages.pages.last); + + var search: PageListSearch = try .init( + alloc, + "Test", + &t.screen.pages, + t.screen.pages.pages.last.?, + ); + defer search.deinit(); + + // No complete match on last page alone (only has "st") + try testing.expect(search.next() == null); + + // Feed first page - this should give us enough data to find "Test" + try testing.expect(try search.feed()); + + // Should find the spanning match + const sel = search.next().?; + try testing.expect(sel.start().node != sel.end().node); + { + const str = try t.screen.selectionString( + alloc, + .{ .sel = sel }, + ); + defer alloc.free(str); + try testing.expectEqualStrings(str, "Test"); + } + + // No more matches + try testing.expect(search.next() == null); + + // No more pages + try testing.expect(!try search.feed()); +} + +test "feed with match spanning page boundary with newline" { + const alloc = testing.allocator; + var t: Terminal = try .init(alloc, .{ .cols = 80, .rows = 24 }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + + const first_page_rows = t.screen.pages.pages.first.?.data.capacity.rows; + + // Fill first page ending with "Te" + for (0..first_page_rows - 1) |_| try s.nextSlice("\r\n"); + for (0..t.screen.pages.cols - 2) |_| try s.nextSlice("x"); + try s.nextSlice("Te"); + try testing.expect(t.screen.pages.pages.first == t.screen.pages.pages.last); + + // Second page starts with "st" + try s.nextSlice("\r\n"); + try testing.expect(t.screen.pages.pages.first != t.screen.pages.pages.last); + try s.nextSlice("st"); + + var search: PageListSearch = try .init( + alloc, + "Test", + &t.screen.pages, + t.screen.pages.pages.last.?, + ); + defer search.deinit(); + + // Should not find any matches since we broke with an explicit newline. + try testing.expect(search.next() == null); + try testing.expect(try search.feed()); + try testing.expect(search.next() == null); + try testing.expect(!try search.feed()); +} diff --git a/src/terminal/search/screen.zig b/src/terminal/search/screen.zig new file mode 100644 index 000000000..036b5813e --- /dev/null +++ b/src/terminal/search/screen.zig @@ -0,0 +1,33 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Screen = @import("../Screen.zig"); +const Active = @import("active.zig").ActiveSearch; + +pub const ScreenSearch = struct { + /// The active area search state + active: Active, + + /// Search state machine + const State = enum { + /// Currently searching the active area + active, + }; + + pub fn init( + alloc: Allocator, + screen: *const Screen, + needle: []const u8, + ) Allocator.Error!ScreenSearch { + _ = screen; + + // Setup our active area search + var active: Active = try .init(alloc, needle); + errdefer active.deinit(); + + // Store our screen + + return .{ + .active = active, + }; + } +}; From 7b26e6319e232bab7685ae235de1e290ba625795 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Thu, 13 Nov 2025 12:58:30 -0800 Subject: [PATCH 08/10] terminal: Pin.garbage tracking --- src/terminal/PageList.zig | 49 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/src/terminal/PageList.zig b/src/terminal/PageList.zig index 82c64591b..aa5e31908 100644 --- a/src/terminal/PageList.zig +++ b/src/terminal/PageList.zig @@ -371,6 +371,9 @@ fn verifyIntegrity(self: *const PageList) IntegrityError!void { if (comptime !build_options.slow_runtime_safety) return; if (self.pause_integrity_checks > 0) return; + // Our viewport pin should never be garbage + assert(!self.viewport_pin.garbage); + // Verify that our cached total_rows matches the actual row count const actual_total = self.totalRows(); if (actual_total != self.total_rows) { @@ -528,6 +531,8 @@ pub fn reset(self: *PageList) void { self.total_rows = self.rows; // Update all our tracked pins to point to our first page top-left + // and mark them as garbage, because it got mangled in a way where + // semantically it really doesn't make sense. { var it = self.tracked_pins.iterator(); while (it.next()) |entry| { @@ -535,7 +540,11 @@ pub fn reset(self: *PageList) void { p.node = self.pages.first.?; p.x = 0; p.y = 0; + p.garbage = true; } + + // Our viewport pin is never garbage + self.viewport_pin.garbage = false; } // Move our viewport back to the active area since everything is gone. @@ -2428,7 +2437,9 @@ pub fn grow(self: *PageList) !?*List.Node { p.node = self.pages.first.?; p.y = 0; p.x = 0; + p.garbage = true; } + self.viewport_pin.garbage = false; // In this case we do NOT need to update page_size because // we're reusing an existing page so nothing has changed. @@ -3047,13 +3058,16 @@ pub fn eraseRows( fn erasePage(self: *PageList, node: *List.Node) void { assert(node.next != null or node.prev != null); - // Update any tracked pins to move to the next page. + // Update any tracked pins to move to the previous or next page. const pin_keys = self.tracked_pins.keys(); for (pin_keys) |p| { if (p.node != node) continue; - p.node = node.next orelse node.prev orelse unreachable; + p.node = node.prev orelse node.next orelse unreachable; p.y = 0; p.x = 0; + + // This doesn't get marked garbage because the tracked pin + // movement is sensical. } // Remove the page from the linked list @@ -3903,6 +3917,13 @@ pub const Pin = struct { y: size.CellCountInt = 0, x: size.CellCountInt = 0, + /// This is flipped to true for tracked pins that were tracking + /// a page that got pruned for any reason and where the tracked pin + /// couldn't be moved to a sensical location. Users of the tracked + /// pin could use this data and make their own determination of + /// semantics. + garbage: bool = false, + pub inline fn rowAndCell(self: Pin) struct { row: *pagepkg.Row, cell: *pagepkg.Cell, @@ -5757,6 +5778,7 @@ test "PageList grow prune scrollback" { try testing.expect(p.node == s.pages.first.?); try testing.expect(p.x == 0); try testing.expect(p.y == 0); + try testing.expect(p.garbage); // Verify the viewport offset cache was invalidated. After pruning, // the offset should have changed because we removed rows from @@ -10641,6 +10663,29 @@ test "PageList reset across two pages" { try testing.expectEqual(@as(usize, s.rows), s.totalRows()); } +test "PageList reset moves tracked pins and marks them as garbage" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try init(alloc, 80, 24, null); + defer s.deinit(); + + // Create a tracked pin into the active area + const p = try s.trackPin(s.pin(.{ .active = .{ + .x = 42, + .y = 12, + } }).?); + defer s.untrackPin(p); + + s.reset(); + + // Our added pin should now be garbage + try testing.expect(p.garbage); + + // Viewport pin should not be garbage because it makes sense. + try testing.expect(!s.viewport_pin.garbage); +} + test "PageList clears history" { const testing = std.testing; const alloc = testing.allocator; From d349cc8932f4ffffbf7680faa9c58ca94bbe8ce6 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Thu, 13 Nov 2025 11:50:35 -0800 Subject: [PATCH 09/10] terminal: ScreenSearch to search a single terminal screen --- src/terminal/PageList.zig | 5 +- src/terminal/search.zig | 1 + src/terminal/search/active.zig | 28 +- src/terminal/search/screen.zig | 607 ++++++++++++++++++++++++++++++++- 4 files changed, 614 insertions(+), 27 deletions(-) diff --git a/src/terminal/PageList.zig b/src/terminal/PageList.zig index aa5e31908..a589af179 100644 --- a/src/terminal/PageList.zig +++ b/src/terminal/PageList.zig @@ -3858,8 +3858,9 @@ fn totalRows(self: *const PageList) usize { return rows; } -/// The total number of pages in this list. -fn totalPages(self: *const PageList) usize { +/// The total number of pages in this list. This should only be used +/// for tests since it is O(N) over the list of pages. +pub fn totalPages(self: *const PageList) usize { var pages: usize = 0; var node_ = self.pages.first; while (node_) |node| { diff --git a/src/terminal/search.zig b/src/terminal/search.zig index 724b5c171..510aac980 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -2,6 +2,7 @@ pub const Active = @import("search/active.zig").ActiveSearch; pub const PageList = @import("search/pagelist.zig").PageListSearch; +pub const Screen = @import("search/screen.zig").ScreenSearch; pub const Thread = @import("search/Thread.zig"); test { diff --git a/src/terminal/search/active.zig b/src/terminal/search/active.zig index b682c6df3..d05417747 100644 --- a/src/terminal/search/active.zig +++ b/src/terminal/search/active.zig @@ -42,10 +42,10 @@ pub const ActiveSearch = struct { /// to perform the search later. This lets the caller hold the lock /// on the PageList for a minimal amount of time. /// - /// This returns the first page (in reverse order) NOT searched by - /// this active area. This is useful for callers that want to follow up - /// with populating the scrollback searcher. The scrollback searcher - /// should start searching from the returned page backwards. + /// This returns the first page (in reverse order) covered by this + /// search. This allows the history search to overlap and search history. + /// There CAN BE duplicates, and this page CAN BE mutable, so the history + /// search results should prune anything that's in the active area. /// /// If the return value is null it means the active area covers the entire /// PageList, currently. @@ -59,8 +59,10 @@ pub const ActiveSearch = struct { // First up, add enough pages to cover the active area. var rem: usize = list.rows; var node_ = list.pages.last; + var last_node: ?*PageList.List.Node = null; while (node_) |node| : (node_ = node.prev) { _ = try self.window.append(node); + last_node = node; // If we reached our target amount, then this is the last // page that contains the active area. We go to the previous @@ -76,18 +78,20 @@ pub const ActiveSearch = struct { // Next, add enough overlap to cover needle.len - 1 bytes (if it // exists) so we can cover the overlap. - rem = self.window.needle.len - 1; while (node_) |node| : (node_ = node.prev) { + // If the last row of this node isn't wrapped we can't overlap. + const row = node.data.getRow(node.data.size.rows - 1); + if (!row.wrap) break; + + // We could be more accurate here and count bytes since the + // last wrap but its complicated and unlikely multiple pages + // wrap so this should be fine. const added = try self.window.append(node); - if (added >= rem) { - node_ = node.prev; - break; - } - rem -= added; + if (added >= self.window.needle.len - 1) break; } - // Return the first page NOT covered by the active area. - return node_; + // Return the last node we added to our window. + return last_node; } /// Find the next match for the needle in the active area. This returns diff --git a/src/terminal/search/screen.zig b/src/terminal/search/screen.zig index 036b5813e..e291f3c2e 100644 --- a/src/terminal/search/screen.zig +++ b/src/terminal/search/screen.zig @@ -1,33 +1,614 @@ const std = @import("std"); +const assert = std.debug.assert; +const testing = std.testing; const Allocator = std.mem.Allocator; +const point = @import("../point.zig"); +const PageList = @import("../PageList.zig"); +const Pin = PageList.Pin; const Screen = @import("../Screen.zig"); -const Active = @import("active.zig").ActiveSearch; +const Selection = @import("../Selection.zig"); +const Terminal = @import("../Terminal.zig"); +const ActiveSearch = @import("active.zig").ActiveSearch; +const PageListSearch = @import("pagelist.zig").PageListSearch; +const SlidingWindow = @import("sliding_window.zig").SlidingWindow; +/// Searches for a needle within a Screen, handling active area updates, +/// pages being pruned from the screen (e.g. scrollback limits), and more. +/// +/// Unlike our lower-level searchers (like ActiveSearch and PageListSearch), +/// this will cache and store all search results so the caller can re-access +/// them as needed. This structure does this because it is intended to help +/// the caller handle the case where the Screen is changing while the user +/// is searching. +/// +/// An inactive screen can continue to be searched in the background, and when +/// screen state changes, the renderer/caller can access the existing search +/// results without needing to re-search everything. This prevents a particularly +/// nasty UX where going to alt screen (e.g. neovim) and then back would +/// restart the full scrollback search. pub const ScreenSearch = struct { + /// The screen being searched. + screen: *Screen, + /// The active area search state - active: Active, + active: ActiveSearch, + + /// The history (scrollback) search state. May be null if there is + /// no history yet. + history: ?HistorySearch, + + /// Current state of the search, a state machine. + state: State, + + /// The results found so far. These are stored separately because history + /// is mostly immutable once found, while active area results may + /// change. This lets us easily reset the active area results for a + /// re-search scenario. + history_results: std.ArrayList(Selection), + active_results: std.ArrayList(Selection), + + /// History search state. + const HistorySearch = struct { + /// The actual searcher state. + searcher: PageListSearch, + + /// The pin for the first node that this searcher is searching from. + /// We use this when the active area changes to find the diff between + /// the top of the new active area and the previous start point + /// to determine if we need to search more history. + start_pin: *Pin, + + pub fn deinit(self: *HistorySearch, screen: *Screen) void { + self.searcher.deinit(); + screen.pages.untrackPin(self.start_pin); + } + }; /// Search state machine const State = enum { /// Currently searching the active area active, + + /// Currently searching the history area + history, + + /// History search is waiting for more data to be fed before + /// it can progress. + history_feed, + + /// Search is complete given the current terminal state. + complete, }; + // Initialize a screen search for the given screen and needle. pub fn init( alloc: Allocator, - screen: *const Screen, + screen: *Screen, needle: []const u8, ) Allocator.Error!ScreenSearch { - _ = screen; - - // Setup our active area search - var active: Active = try .init(alloc, needle); - errdefer active.deinit(); - - // Store our screen - - return .{ - .active = active, + var result: ScreenSearch = .{ + .screen = screen, + .active = try .init(alloc, needle), + .history = null, + .state = .active, + .active_results = .empty, + .history_results = .empty, }; + errdefer result.deinit(); + + // Update our initial active area state + try result.reloadActive(); + + return result; + } + + pub fn deinit(self: *ScreenSearch) void { + const alloc = self.allocator(); + self.active.deinit(); + if (self.history) |*h| h.deinit(self.screen); + self.active_results.deinit(alloc); + self.history_results.deinit(alloc); + } + + fn allocator(self: *ScreenSearch) Allocator { + return self.active.window.alloc; + } + + pub const TickError = Allocator.Error || error{ + FeedRequired, + SearchComplete, + }; + + /// Returns all matches as an owned slice (caller must free). + /// The matches are ordered from most recent to oldest (e.g. bottom + /// of the screen to top of the screen). + /// + /// This handles pruning overlapping results between active area + /// and the history area so you should use this instead of accessing + /// the result slices directly. + pub fn matches( + self: *ScreenSearch, + alloc: Allocator, + ) Allocator.Error![]Selection { + const active_results = self.active_results.items; + const history_results: []const Selection = if (self.history) |*h| history_results: { + // We prune all the history results that start in our first + // history page because the active area will overlap and + // get that. + for (self.history_results.items, 0..) |sel, i| { + if (sel.start().node != h.start_pin.node) { + break :history_results self.history_results.items[i..]; + } + } + + break :history_results &.{}; + } else &.{}; + + const results = try alloc.alloc( + Selection, + active_results.len + history_results.len, + ); + errdefer alloc.free(results); + + // Active does a forward search, so we add the active results then + // reverse them. There are usually not many active results so this + // is fast enough compared to adding them in reverse order. + assert(self.active.window.direction == .forward); + @memcpy( + results[0..active_results.len], + active_results, + ); + std.mem.reverse(Selection, results[0..active_results.len]); + + // History does a backward search, so we can just append them + // after. + @memcpy( + results[active_results.len..], + history_results, + ); + + return results; + } + + /// Search the full screen state. This will block until the search + /// is complete. For performance, it is recommended to use `tick` and + /// `feed` to incrementally make progress on the search instead. + pub fn searchAll(self: *ScreenSearch) Allocator.Error!void { + while (true) { + self.tick() catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.FeedRequired => try self.feed(), + error.SearchComplete => return, + }; + } + } + + /// Make incremental progress on the search without accessing any + /// screen state (so no lock is required). + /// + /// This will return error.FeedRequired if the search cannot make progress + /// without being fed more data. In this case, the caller should call + /// the `feed` function to provide more data to the searcher. + /// + /// This will return error.SearchComplete if the search is fully complete. + /// This is to signal to the caller that it can move to a more efficient + /// sleep/wait state until there is more work to do (e.g. new data to feed). + pub fn tick(self: *ScreenSearch) TickError!void { + switch (self.state) { + .active => try self.tickActive(), + .history => try self.tickHistory(), + .history_feed => return error.FeedRequired, + .complete => return error.SearchComplete, + } + } + + /// Feed more data to the searcher so it can continue searching. This + /// accesses the screen state, so the caller must hold the necessary locks. + pub fn feed(self: *ScreenSearch) Allocator.Error!void { + const history: *PageListSearch = if (self.history) |*h| &h.searcher else { + // No history to feed, search is complete. + self.state = .complete; + return; + }; + + // Future: we may want to feed multiple pages at once here to + // lower the frequency of lock acquisitions. + if (!try history.feed()) { + // No more data to feed, search is complete. + self.state = .complete; + return; + } + + // Depending on our state handle where feed goes + switch (self.state) { + // If we're searching active or history, then feeding doesn't + // change the state. + .active, .history => {}, + + // Feed goes back to searching history. + .history_feed => self.state = .history, + + // If we're complete then the feed call above should always + // return false and we can't reach this. + .complete => unreachable, + } + } + + fn tickActive(self: *ScreenSearch) Allocator.Error!void { + // For the active area, we consume the entire search in one go + // because the active area is generally small. + const alloc = self.allocator(); + while (self.active.next()) |sel| { + // If this fails, then we miss a result since `active.next()` + // moves forward and prunes data. In the future, we may want + // to have some more robust error handling but the only + // scenario this would fail is OOM and we're probably in + // deeper trouble at that point anyways. + try self.active_results.append(alloc, sel); + } + + // We've consumed the entire active area, move to history. + self.state = .history; + } + + fn tickHistory(self: *ScreenSearch) Allocator.Error!void { + const history: *PageListSearch = if (self.history) |*h| &h.searcher else { + // No history to search, we're done. + self.state = .complete; + return; + }; + + // Try to consume all the loaded matches in one go, because + // the search is generally fast for loaded data. + const alloc = self.allocator(); + while (history.next()) |sel| { + // Same note as tickActive for error handling. + try self.history_results.append(alloc, sel); + } + + // We need to be fed more data. + self.state = .history_feed; + } + + /// Reload the active area because it has changed. + /// + /// Since it is very fast, this will also do the full active area + /// search again, too. This avoids any complexity around the search + /// state machine. + /// + /// The caller must hold the necessary locks to access the screen state. + pub fn reloadActive(self: *ScreenSearch) Allocator.Error!void { + const list: *PageList = &self.screen.pages; + if (try self.active.update(list)) |history_node| history: { + // We need to account for any active area growth that would + // cause new pages to move into our history. If there are new + // pages then we need to re-search the pages and add it to + // our history results. + + const history_: ?*HistorySearch = if (self.history) |*h| state: { + // If our start pin became garbage, it means we pruned all + // the way up through it, so we have no history anymore. + // Reset our history state. + if (h.start_pin.garbage) { + h.deinit(self.screen); + self.history = null; + self.history_results.clearRetainingCapacity(); + break :state null; + } + + break :state h; + } else null; + + const history = history_ orelse { + // No history search yet, but we now have history. So let's + // initialize. + + // Our usage of needle below depends on this + assert(self.active.window.direction == .forward); + + var search: PageListSearch = try .init( + self.allocator(), + self.active.window.needle, + list, + history_node, + ); + errdefer search.deinit(); + + const pin = try list.trackPin(.{ .node = history_node }); + errdefer list.untrackPin(pin); + + self.history = .{ + .searcher = search, + .start_pin = pin, + }; + + // We don't need to update any history since we had no history + // before, so we can break out of the whole conditional. + break :history; + }; + + if (history.start_pin.node == history_node) { + // No change in the starting node, we're done. + break :history; + } + + // We had prior history with a valid pin and our current + // starting history node doesn't match our previous. So there is + // a small delta (usually small) that we need to search and update + // our history results. + const old_node = history.start_pin.node; + + // Do a forward search from our prior node to this one. We + // collect all the results into a new list. We ASSUME that + // reloadActive is being called frequently enough that there isn't + // a massive amount of history to search here. + const alloc = self.allocator(); + var window: SlidingWindow = try .init( + alloc, + .forward, + self.active.window.needle, + ); + defer window.deinit(); + while (true) { + _ = try window.append(history.start_pin.node); + if (history.start_pin.node == history_node) break; + const next = history.start_pin.node.next orelse break; + history.start_pin.node = next; + } + assert(history.start_pin.node == history_node); + + var results: std.ArrayList(Selection) = try .initCapacity( + alloc, + self.history_results.items.len, + ); + errdefer results.deinit(alloc); + while (window.next()) |sel| try results.append( + alloc, + sel, + ); + + // If we have no matches then there is nothing to change + // in our history (fast path) + if (results.items.len == 0) break :history; + + // Matches! Reverse our list then append all the remaining + // history items that didn't start on our original node. + std.mem.reverse(Selection, results.items); + for (self.history_results.items, 0..) |sel, i| { + if (sel.start().node != old_node) { + try results.appendSlice(alloc, self.history_results.items[i..]); + break; + } + } + self.history_results.deinit(alloc); + self.history_results = results; + } + + // Reset our active search results and search again. + self.active_results.clearRetainingCapacity(); + switch (self.state) { + // If we're in the active state we run a normal tick so + // we can move into a better state. + .active => try self.tickActive(), + + // Otherwise, just tick it and move back to whatever state + // we were in. + else => { + const old_state = self.state; + defer self.state = old_state; + try self.tickActive(); + }, + } } }; + +test "simple search" { + const alloc = testing.allocator; + var t: Terminal = try .init(alloc, .{ .cols = 10, .rows = 2 }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + try s.nextSlice("Fizz\r\nBuzz\r\nFizz\r\nBang"); + + var search: ScreenSearch = try .init(alloc, &t.screen, "Fizz"); + defer search.deinit(); + try search.searchAll(); + try testing.expectEqual(2, search.active_results.items.len); + // We don't test history results since there is overlap + + // Get all matches + const matches = try search.matches(alloc); + defer alloc.free(matches); + try testing.expectEqual(2, matches.len); + + { + const sel = matches[0]; + try testing.expectEqual(point.Point{ .screen = .{ + .x = 0, + .y = 2, + } }, t.screen.pages.pointFromPin(.screen, sel.start()).?); + try testing.expectEqual(point.Point{ .screen = .{ + .x = 3, + .y = 2, + } }, t.screen.pages.pointFromPin(.screen, sel.end()).?); + } + { + const sel = matches[1]; + try testing.expectEqual(point.Point{ .screen = .{ + .x = 0, + .y = 0, + } }, t.screen.pages.pointFromPin(.screen, sel.start()).?); + try testing.expectEqual(point.Point{ .screen = .{ + .x = 3, + .y = 0, + } }, t.screen.pages.pointFromPin(.screen, sel.end()).?); + } +} + +test "simple search with history" { + const alloc = testing.allocator; + var t: Terminal = try .init(alloc, .{ + .cols = 10, + .rows = 2, + .max_scrollback = std.math.maxInt(usize), + }); + defer t.deinit(alloc); + const list: *PageList = &t.screen.pages; + + var s = t.vtStream(); + defer s.deinit(); + + try s.nextSlice("Fizz\r\n"); + while (list.totalPages() < 3) try s.nextSlice("\r\n"); + for (0..list.rows) |_| try s.nextSlice("\r\n"); + try s.nextSlice("hello."); + + var search: ScreenSearch = try .init(alloc, &t.screen, "Fizz"); + defer search.deinit(); + try search.searchAll(); + try testing.expectEqual(0, search.active_results.items.len); + + // Get all matches + const matches = try search.matches(alloc); + defer alloc.free(matches); + try testing.expectEqual(1, matches.len); + + { + const sel = matches[0]; + try testing.expectEqual(point.Point{ .screen = .{ + .x = 0, + .y = 0, + } }, t.screen.pages.pointFromPin(.screen, sel.start()).?); + try testing.expectEqual(point.Point{ .screen = .{ + .x = 3, + .y = 0, + } }, t.screen.pages.pointFromPin(.screen, sel.end()).?); + } +} + +test "reload active with history change" { + const alloc = testing.allocator; + var t: Terminal = try .init(alloc, .{ + .cols = 10, + .rows = 2, + .max_scrollback = std.math.maxInt(usize), + }); + defer t.deinit(alloc); + const list: *PageList = &t.screen.pages; + + var s = t.vtStream(); + defer s.deinit(); + try s.nextSlice("Fizz\r\n"); + + // Start up our search which will populate our initial active area. + var search: ScreenSearch = try .init(alloc, &t.screen, "Fizz"); + defer search.deinit(); + try search.searchAll(); + { + const matches = try search.matches(alloc); + defer alloc.free(matches); + try testing.expectEqual(1, matches.len); + } + + // Grow into two pages so our history pin will move. + while (list.totalPages() < 2) try s.nextSlice("\r\n"); + for (0..list.rows) |_| try s.nextSlice("\r\n"); + try s.nextSlice("2Fizz"); + + // Active area changed so reload + try search.reloadActive(); + try search.searchAll(); + + // Get all matches + { + const matches = try search.matches(alloc); + defer alloc.free(matches); + try testing.expectEqual(2, matches.len); + { + const sel = matches[1]; + try testing.expectEqual(point.Point{ .screen = .{ + .x = 0, + .y = 0, + } }, t.screen.pages.pointFromPin(.screen, sel.start()).?); + try testing.expectEqual(point.Point{ .screen = .{ + .x = 3, + .y = 0, + } }, t.screen.pages.pointFromPin(.screen, sel.end()).?); + } + { + const sel = matches[0]; + try testing.expectEqual(point.Point{ .active = .{ + .x = 1, + .y = 1, + } }, t.screen.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 4, + .y = 1, + } }, t.screen.pages.pointFromPin(.active, sel.end()).?); + } + } + + // Reset the screen which will make our pin garbage. + t.fullReset(); + try s.nextSlice("WeFizzing"); + try search.reloadActive(); + try search.searchAll(); + + { + const matches = try search.matches(alloc); + defer alloc.free(matches); + try testing.expectEqual(1, matches.len); + { + const sel = matches[0]; + try testing.expectEqual(point.Point{ .active = .{ + .x = 2, + .y = 0, + } }, t.screen.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 5, + .y = 0, + } }, t.screen.pages.pointFromPin(.active, sel.end()).?); + } + } +} + +test "active change contents" { + const alloc = testing.allocator; + var t: Terminal = try .init(alloc, .{ .cols = 10, .rows = 5 }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + try s.nextSlice("Fuzz\r\nBuzz\r\nFizz\r\nBang"); + + var search: ScreenSearch = try .init(alloc, &t.screen, "Fizz"); + defer search.deinit(); + try search.searchAll(); + try testing.expectEqual(1, search.active_results.items.len); + + // Erase the screen, move our cursor to the top, and change contents. + try s.nextSlice("\x1b[2J\x1b[H"); // Clear screen and move home + try s.nextSlice("Bang\r\nFizz\r\nHello!"); + + try search.reloadActive(); + try search.searchAll(); + try testing.expectEqual(1, search.active_results.items.len); + + // Get all matches + const matches = try search.matches(alloc); + defer alloc.free(matches); + try testing.expectEqual(1, matches.len); + + { + const sel = matches[0]; + try testing.expectEqual(point.Point{ .screen = .{ + .x = 0, + .y = 1, + } }, t.screen.pages.pointFromPin(.screen, sel.start()).?); + try testing.expectEqual(point.Point{ .screen = .{ + .x = 3, + .y = 1, + } }, t.screen.pages.pointFromPin(.screen, sel.end()).?); + } +} From 6b805a318eb506e338bf6816176d751e31fc58fb Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Fri, 14 Nov 2025 07:24:02 -0800 Subject: [PATCH 10/10] terminal: ScreenSearch can omit overlapped results in history tick --- src/terminal/search/screen.zig | 50 ++++++++++------------------------ 1 file changed, 15 insertions(+), 35 deletions(-) diff --git a/src/terminal/search/screen.zig b/src/terminal/search/screen.zig index e291f3c2e..0ffeb76c4 100644 --- a/src/terminal/search/screen.zig +++ b/src/terminal/search/screen.zig @@ -122,28 +122,12 @@ pub const ScreenSearch = struct { /// Returns all matches as an owned slice (caller must free). /// The matches are ordered from most recent to oldest (e.g. bottom /// of the screen to top of the screen). - /// - /// This handles pruning overlapping results between active area - /// and the history area so you should use this instead of accessing - /// the result slices directly. pub fn matches( self: *ScreenSearch, alloc: Allocator, ) Allocator.Error![]Selection { const active_results = self.active_results.items; - const history_results: []const Selection = if (self.history) |*h| history_results: { - // We prune all the history results that start in our first - // history page because the active area will overlap and - // get that. - for (self.history_results.items, 0..) |sel, i| { - if (sel.start().node != h.start_pin.node) { - break :history_results self.history_results.items[i..]; - } - } - - break :history_results &.{}; - } else &.{}; - + const history_results = self.history_results.items; const results = try alloc.alloc( Selection, active_results.len + history_results.len, @@ -252,7 +236,7 @@ pub const ScreenSearch = struct { } fn tickHistory(self: *ScreenSearch) Allocator.Error!void { - const history: *PageListSearch = if (self.history) |*h| &h.searcher else { + const history: *HistorySearch = if (self.history) |*h| h else { // No history to search, we're done. self.state = .complete; return; @@ -261,7 +245,11 @@ pub const ScreenSearch = struct { // Try to consume all the loaded matches in one go, because // the search is generally fast for loaded data. const alloc = self.allocator(); - while (history.next()) |sel| { + while (history.searcher.next()) |sel| { + // Ignore selections that are found within the starting + // node since those are covered by the active area search. + if (sel.start().node == history.start_pin.node) continue; + // Same note as tickActive for error handling. try self.history_results.append(alloc, sel); } @@ -332,12 +320,6 @@ pub const ScreenSearch = struct { break :history; } - // We had prior history with a valid pin and our current - // starting history node doesn't match our previous. So there is - // a small delta (usually small) that we need to search and update - // our history results. - const old_node = history.start_pin.node; - // Do a forward search from our prior node to this one. We // collect all the results into a new list. We ASSUME that // reloadActive is being called frequently enough that there isn't @@ -362,10 +344,13 @@ pub const ScreenSearch = struct { self.history_results.items.len, ); errdefer results.deinit(alloc); - while (window.next()) |sel| try results.append( - alloc, - sel, - ); + while (window.next()) |sel| { + if (sel.start().node == history_node) continue; + try results.append( + alloc, + sel, + ); + } // If we have no matches then there is nothing to change // in our history (fast path) @@ -374,12 +359,7 @@ pub const ScreenSearch = struct { // Matches! Reverse our list then append all the remaining // history items that didn't start on our original node. std.mem.reverse(Selection, results.items); - for (self.history_results.items, 0..) |sel, i| { - if (sel.start().node != old_node) { - try results.appendSlice(alloc, self.history_results.items[i..]); - break; - } - } + try results.appendSlice(alloc, self.history_results.items); self.history_results.deinit(alloc); self.history_results = results; }