terminal: PageListSearch updated to split next and feed

pull/9585/head
Mitchell Hashimoto 2025-11-13 09:25:27 -08:00
parent 22496b8f0e
commit 2b647ba4cb
No known key found for this signature in database
GPG Key ID: 523D5DC389D273BC
2 changed files with 376 additions and 49 deletions

View File

@ -1,29 +1,7 @@
//! Search functionality for the terminal.
//!
//! At the time of writing this comment, this is a **work in progress**.
//!
//! Search at the time of writing is implemented using a simple
//! boyer-moore-horspool algorithm. The suboptimal part of the implementation
//! is that we need to encode each terminal page into a text buffer in order
//! to apply BMH to it. This is because the terminal page is not laid out
//! in a flat text form.
//!
//! To minimize memory usage, we use a sliding window to search for the
//! needle. The sliding window only keeps the minimum amount of page data
//! in memory to search for a needle (i.e. `needle.len - 1` bytes of overlap
//! between terminal pages).
//!
//! Future work:
//!
//! - PageListSearch on a PageList concurrently with another thread
//! - Handle pruned pages in a PageList to ensure we don't keep references
//! - Repeat search a changing active area of the screen
//! - Reverse search so that more recent matches are found first
//!
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const testing = std.testing;
const CircBuf = @import("../../datastruct/main.zig").CircBuf;
const terminal = @import("../main.zig");
const point = terminal.point;
@ -33,13 +11,24 @@ const Pin = PageList.Pin;
const Selection = terminal.Selection;
const Screen = terminal.Screen;
const PageFormatter = @import("../formatter.zig").PageFormatter;
const Terminal = @import("../Terminal.zig");
const SlidingWindow = @import("sliding_window.zig").SlidingWindow;
/// Searches for a term in a PageList structure.
///
/// At the time of writing, this does not support searching a pagelist
/// simultaneously as its being used by another thread. This will be resolved
/// in the future.
/// This searches in reverse order starting from the given node.
///
/// This assumes that nodes do not change contents. For nodes that change
/// contents, look at ActiveSearch, which is designed to re-search the active
/// area since it assumed to change. When integrating ActiveSearch with
/// PageListSearch, the caller should start the PageListSearch from the
/// returned node from ActiveSearch.update().
///
/// Concurrent access to a PageList or nodes in a PageList are not allowed,
/// so the caller should ensure that necessary locks are held. Each function
/// documents whether it accesses the PageList or not. For example, you can
/// safely call `next()` without holding a lock, but you must hold a lock
/// while calling `feed()`.
pub const PageListSearch = struct {
/// The list we're searching.
list: *PageList,
@ -47,50 +36,355 @@ pub const PageListSearch = struct {
/// The sliding window of page contents and nodes to search.
window: SlidingWindow,
/// The tracked pin for our current position in the pagelist. This
/// will always point to the CURRENT node we're searching from so that
/// we can track if we move.
pin: *Pin,
/// Initialize the page list search. The needle is copied so it can
/// be freed immediately.
///
/// Accesses the PageList/Node so the caller must ensure it is safe
/// to do so if there is any concurrent access.
pub fn init(
alloc: Allocator,
list: *PageList,
needle: []const u8,
list: *PageList,
start: *PageList.List.Node,
) Allocator.Error!PageListSearch {
var window: SlidingWindow = try .init(alloc, .forward, needle);
// We put a tracked pin into the node that we're starting from.
// By using a tracked pin, we can keep our pagelist references safe
// because if the pagelist prunes pages, the tracked pin will
// be moved somewhere safe.
const pin = try list.trackPin(.{
.node = start,
.y = start.data.size.rows - 1,
.x = start.data.size.cols - 1,
});
errdefer list.untrackPin(pin);
// Create our sliding window we'll use for searching.
var window: SlidingWindow = try .init(alloc, .reverse, needle);
errdefer window.deinit();
// We always feed our initial page data into the window, because
// we have the lock anyways and this lets our `pin` point to our
// current node and feed to work properly.
_ = try window.append(start);
return .{
.list = list,
.window = window,
.pin = pin,
};
}
/// Modifies the PageList (to untrack a pin) so the caller must ensure
/// that it is safe to do so.
pub fn deinit(self: *PageListSearch) void {
self.window.deinit();
self.list.untrackPin(self.pin);
}
/// Find the next match for the needle in the pagelist. This returns
/// null when there are no more matches.
pub fn next(self: *PageListSearch) Allocator.Error!?Selection {
// Try to search for the needle in the window. If we find a match
// then we can return that and we're done.
if (self.window.next()) |sel| return sel;
/// Return the next match in the loaded page nodes. If this returns
/// null then the PageList search needs to be fed the next node(s).
/// Call, `feed` to do this.
///
/// Beware that the selection returned may point to a node that
/// is freed if the caller does not hold necessary locks on the
/// PageList while searching. The pins should be validated prior to
/// final use.
///
/// This does NOT access the PageList, so it can be called without
/// a lock held.
pub fn next(self: *PageListSearch) ?Selection {
return self.window.next();
}
// Get our next node. If we have a value in our window then we
// can determine the next node. If we don't, we've never setup the
// window so we use our first node.
var node_: ?*PageList.List.Node = if (self.window.meta.last()) |meta|
meta.node.next
else
self.list.pages.first;
/// Feed more data to the sliding window from the pagelist. This will
/// feed enough data to cover at least one match (needle length) if it
/// exists; this doesn't perform the search, it only feeds data.
///
/// This accesses nodes in the PageList, so the caller must ensure
/// it is safe to do so (i.e. hold necessary locks).
///
/// This returns false if there is no more data to feed. This essentially
/// means we've searched the entire pagelist.
pub fn feed(self: *PageListSearch) Allocator.Error!bool {
// Add at least enough data to find a single match.
var rem = self.window.needle.len;
// Add one pagelist node at a time, look for matches, and repeat
// until we find a match or we reach the end of the pagelist.
// This append then next pattern limits memory usage of the window.
while (node_) |node| : (node_ = node.next) {
try self.window.append(node);
if (self.window.next()) |sel| return sel;
// Start at our previous node and then continue adding until we
// get our desired amount of data.
var node_: ?*PageList.List.Node = self.pin.node.prev;
while (node_) |node| : (node_ = node.prev) {
rem -|= try self.window.append(node);
// Move our tracked pin to the new node.
self.pin.node = node;
if (rem == 0) break;
}
// We've reached the end of the pagelist, no matches.
return null;
// True if we fed any data.
return rem < self.window.needle.len;
}
};
test "simple search" {
const alloc = testing.allocator;
var t: Terminal = try .init(alloc, .{ .cols = 10, .rows = 10 });
defer t.deinit(alloc);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice("Fizz\r\nBuzz\r\nFizz\r\nBang");
var search: PageListSearch = try .init(
alloc,
"Fizz",
&t.screen.pages,
t.screen.pages.pages.last.?,
);
defer search.deinit();
{
const sel = search.next().?;
try testing.expectEqual(point.Point{ .active = .{
.x = 0,
.y = 2,
} }, t.screen.pages.pointFromPin(.active, sel.start()).?);
try testing.expectEqual(point.Point{ .active = .{
.x = 3,
.y = 2,
} }, t.screen.pages.pointFromPin(.active, sel.end()).?);
}
{
const sel = search.next().?;
try testing.expectEqual(point.Point{ .active = .{
.x = 0,
.y = 0,
} }, t.screen.pages.pointFromPin(.active, sel.start()).?);
try testing.expectEqual(point.Point{ .active = .{
.x = 3,
.y = 0,
} }, t.screen.pages.pointFromPin(.active, sel.end()).?);
}
try testing.expect(search.next() == null);
// We should not be able to feed since we have one page
try testing.expect(!try search.feed());
}
test "feed multiple pages with matches" {
const alloc = testing.allocator;
var t: Terminal = try .init(alloc, .{ .cols = 10, .rows = 10 });
defer t.deinit(alloc);
var s = t.vtStream();
defer s.deinit();
// Fill up first page
const first_page_rows = t.screen.pages.pages.first.?.data.capacity.rows;
for (0..first_page_rows - 1) |_| try s.nextSlice("\r\n");
try s.nextSlice("Fizz");
try testing.expect(t.screen.pages.pages.first == t.screen.pages.pages.last);
// Create second page
try s.nextSlice("\r\n");
try testing.expect(t.screen.pages.pages.first != t.screen.pages.pages.last);
try s.nextSlice("Buzz\r\nFizz");
var search: PageListSearch = try .init(
alloc,
"Fizz",
&t.screen.pages,
t.screen.pages.pages.last.?,
);
defer search.deinit();
// First match on the last page
const sel1 = search.next();
try testing.expect(sel1 != null);
try testing.expect(search.next() == null);
// Feed should succeed and load the first page
try testing.expect(try search.feed());
// Now we should find the match on the first page
const sel2 = search.next();
try testing.expect(sel2 != null);
try testing.expect(search.next() == null);
// No more pages to feed
try testing.expect(!try search.feed());
}
test "feed multiple pages no matches" {
const alloc = testing.allocator;
var t: Terminal = try .init(alloc, .{ .cols = 10, .rows = 10 });
defer t.deinit(alloc);
var s = t.vtStream();
defer s.deinit();
// Fill up first page
const first_page_rows = t.screen.pages.pages.first.?.data.capacity.rows;
for (0..first_page_rows - 1) |_| try s.nextSlice("\r\n");
try s.nextSlice("Hello");
// Create second page
try s.nextSlice("\r\n");
try testing.expect(t.screen.pages.pages.first != t.screen.pages.pages.last);
try s.nextSlice("World");
var search: PageListSearch = try .init(
alloc,
"Nope",
&t.screen.pages,
t.screen.pages.pages.last.?,
);
defer search.deinit();
// No matches on last page
try testing.expect(search.next() == null);
// Feed first page
try testing.expect(try search.feed());
// Still no matches
try testing.expect(search.next() == null);
// No more pages
try testing.expect(!try search.feed());
}
test "feed iteratively through multiple matches" {
const alloc = testing.allocator;
var t: Terminal = try .init(alloc, .{ .cols = 80, .rows = 24 });
defer t.deinit(alloc);
var s = t.vtStream();
defer s.deinit();
const first_page_rows = t.screen.pages.pages.first.?.data.capacity.rows;
// Fill first page with a match at the end
for (0..first_page_rows - 1) |_| try s.nextSlice("\r\n");
try s.nextSlice("Page1Test");
try testing.expect(t.screen.pages.pages.first == t.screen.pages.pages.last);
// Create second page with a match
try s.nextSlice("\r\n");
try testing.expect(t.screen.pages.pages.first != t.screen.pages.pages.last);
try s.nextSlice("Page2Test");
var search: PageListSearch = try .init(
alloc,
"Test",
&t.screen.pages,
t.screen.pages.pages.last.?,
);
defer search.deinit();
// Match on page 2
try testing.expect(search.next() != null);
try testing.expect(search.next() == null);
// Feed page 1
try testing.expect(try search.feed());
try testing.expect(search.next() != null);
try testing.expect(search.next() == null);
// No more pages
try testing.expect(!try search.feed());
}
test "feed with match spanning page boundary" {
const alloc = testing.allocator;
var t: Terminal = try .init(alloc, .{ .cols = 80, .rows = 24 });
defer t.deinit(alloc);
var s = t.vtStream();
defer s.deinit();
const first_page_rows = t.screen.pages.pages.first.?.data.capacity.rows;
// Fill first page ending with "Te"
for (0..first_page_rows - 1) |_| try s.nextSlice("\r\n");
for (0..t.screen.pages.cols - 2) |_| try s.nextSlice("x");
try s.nextSlice("Te");
try testing.expect(t.screen.pages.pages.first == t.screen.pages.pages.last);
// Second page starts with "st"
try s.nextSlice("st");
try testing.expect(t.screen.pages.pages.first != t.screen.pages.pages.last);
var search: PageListSearch = try .init(
alloc,
"Test",
&t.screen.pages,
t.screen.pages.pages.last.?,
);
defer search.deinit();
// No complete match on last page alone (only has "st")
try testing.expect(search.next() == null);
// Feed first page - this should give us enough data to find "Test"
try testing.expect(try search.feed());
// Should find the spanning match
const sel = search.next().?;
try testing.expect(sel.start().node != sel.end().node);
{
const str = try t.screen.selectionString(
alloc,
.{ .sel = sel },
);
defer alloc.free(str);
try testing.expectEqualStrings(str, "Test");
}
// No more matches
try testing.expect(search.next() == null);
// No more pages
try testing.expect(!try search.feed());
}
test "feed with match spanning page boundary with newline" {
const alloc = testing.allocator;
var t: Terminal = try .init(alloc, .{ .cols = 80, .rows = 24 });
defer t.deinit(alloc);
var s = t.vtStream();
defer s.deinit();
const first_page_rows = t.screen.pages.pages.first.?.data.capacity.rows;
// Fill first page ending with "Te"
for (0..first_page_rows - 1) |_| try s.nextSlice("\r\n");
for (0..t.screen.pages.cols - 2) |_| try s.nextSlice("x");
try s.nextSlice("Te");
try testing.expect(t.screen.pages.pages.first == t.screen.pages.pages.last);
// Second page starts with "st"
try s.nextSlice("\r\n");
try testing.expect(t.screen.pages.pages.first != t.screen.pages.pages.last);
try s.nextSlice("st");
var search: PageListSearch = try .init(
alloc,
"Test",
&t.screen.pages,
t.screen.pages.pages.last.?,
);
defer search.deinit();
// Should not find any matches since we broke with an explicit newline.
try testing.expect(search.next() == null);
try testing.expect(try search.feed());
try testing.expect(search.next() == null);
try testing.expect(!try search.feed());
}

View File

@ -0,0 +1,33 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const Screen = @import("../Screen.zig");
const Active = @import("active.zig").ActiveSearch;
pub const ScreenSearch = struct {
/// The active area search state
active: Active,
/// Search state machine
const State = enum {
/// Currently searching the active area
active,
};
pub fn init(
alloc: Allocator,
screen: *const Screen,
needle: []const u8,
) Allocator.Error!ScreenSearch {
_ = screen;
// Setup our active area search
var active: Active = try .init(alloc, needle);
errdefer active.deinit();
// Store our screen
return .{
.active = active,
};
}
};