fix: use a partial write capable case folding writer in Buffer.find_all_ranges

This fixes case insensitive search. Previously the case folding would fail on input slices that contain partial utf8 sequences, which is normal in the buffer write process design. Now these partial utf8 sequences are not consumed and instead pushed to the next write call where they will be completed from the main buffer contents.
refactor: use unchecked Utf8View in utf8 transformation
2025-11-26 09:56:39 +01:00 · 2025-11-26 09:33:04 +01:00 · 2025-11-25 22:48:11 +01:00 · 2025-11-25 22:35:14 +01:00 · 2025-11-25 21:51:23 +01:00 · 2025-11-25 21:50:36 +01:00
5 changed files with 151 additions and 33 deletions
--- a/src/buffer/Buffer.zig
+++ b/src/buffer/Buffer.zig
@ -17,6 +17,8 @@ pub const Cursor = @import("Cursor.zig");
 pub const View = @import("View.zig");
 pub const Selection = @import("Selection.zig");

+pub const FindMode = enum { exact, case_folded };
+
 pub const Metrics = struct {
    ctx: *const anyopaque,
    egc_length: egc_length_func,
@ -943,10 +945,10 @@ const Node = union(enum) {
        }
    }

-    pub const FindMode = enum { exact, case_folded };
    pub const FindAllCallback = fn (data: *anyopaque, begin_row: usize, begin_col: usize, end_row: usize, end_col: usize) error{Stop}!void;
    pub fn find_all_ranges(self: *const Node, pattern: []const u8, data: *anyopaque, callback: *const FindAllCallback, mode: FindMode, allocator: Allocator) error{ OutOfMemory, Stop }!void {
        const Ctx = struct {
+            allocator: std.mem.Allocator,
            pattern: []const u8,
            data: *anyopaque,
            callback: *const FindAllCallback,
@ -985,7 +987,11 @@ const Node = union(enum) {
                            input = input[input_consume_size..];
                        },
                        .case_folded => {
-                            @panic("unimplemented");
+                            const input_consume_size = @min(ctx.buf.len - ctx.rest.len, input.len);
+                            var writer = std.Io.Writer.fixed(ctx.buf[ctx.rest.len..]);
+                            const folded = unicode.case_folded_write_partial(&writer, input[0..input_consume_size]) catch return error.WriteFailed;
+                            ctx.rest = ctx.buf[0 .. ctx.rest.len + folded.len];
+                            input = input[folded.len..];
                        },
                    }

@ -1030,6 +1036,7 @@ const Node = union(enum) {
            }
        };
        var ctx: Ctx = .{
+            .allocator = allocator,
            .pattern = pattern,
            .data = data,
            .callback = callback,
--- a/src/buffer/unicode.zig
+++ b/src/buffer/unicode.zig
@ -89,7 +89,7 @@ fn raw_byte_to_utf8(cp: u8, buf: []u8) ![]const u8 {
    var utf16le: [1]u16 = undefined;
    const utf16le_as_bytes = std.mem.sliceAsBytes(utf16le[0..]);
    std.mem.writeInt(u16, utf16le_as_bytes[0..2], cp, .little);
-    return buf[0..try std.unicode.utf16LeToUtf8(buf, &utf16le)];
+    return buf[0..try utf16LeToUtf8(buf, &utf16le)];
 }

 pub fn utf8_sanitize(allocator: std.mem.Allocator, input: []const u8) error{
@ -106,17 +106,14 @@ pub fn utf8_sanitize(allocator: std.mem.Allocator, input: []const u8) error{
 }

 pub const TransformError = error{
-    InvalidUtf8,
    OutOfMemory,
    Utf8CannotEncodeSurrogateHalf,
    CodepointTooLarge,
    WriteFailed,
 };

-fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
-    var result: std.Io.Writer.Allocating = .init(allocator);
-    defer result.deinit();
-    const view: std.unicode.Utf8View = try .init(text);
+fn utf8_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError!void {
+    const view: Utf8View = .initUnchecked(text);
    var it = view.iterator();
    while (it.nextCodepoint()) |cp| {
        const cp_ = switch (field) {
@ -125,14 +122,36 @@ fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator
            else => @compileError(@tagName(field) ++ " is not a unicode transformation"),
        };
        var utf8_buf: [6]u8 = undefined;
-        const size = try std.unicode.utf8Encode(cp_, &utf8_buf);
-        try result.writer.writeAll(utf8_buf[0..size]);
+        const size = try utf8Encode(cp_, &utf8_buf);
+        try writer.writeAll(utf8_buf[0..size]);
    }
+}
+
+fn utf8_partial_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError![]const u8 {
+    const view: Utf8PartialView = .initUnchecked(text);
+    var it = view.iterator();
+    while (it.nextCodepoint()) |cp| {
+        const cp_ = switch (field) {
+            .simple_uppercase_mapping, .simple_lowercase_mapping => uucode.get(field, cp) orelse cp,
+            .case_folding_simple => uucode.get(field, cp),
+            else => @compileError(@tagName(field) ++ " is not a unicode transformation"),
+        };
+        var utf8_buf: [6]u8 = undefined;
+        const size = try utf8Encode(cp_, &utf8_buf);
+        try writer.writeAll(utf8_buf[0..size]);
+    }
+    return text[0..it.end];
+}
+
+fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
+    var result: std.Io.Writer.Allocating = .init(allocator);
+    defer result.deinit();
+    try utf8_write_transform(field, &result.writer, text);
    return result.toOwnedSlice();
 }

-fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) TransformError!bool {
-    const view: std.unicode.Utf8View = try .init(text);
+fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) bool {
+    const view: Utf8View = .initUnchecked(text);
    var it = view.iterator();
    while (it.nextCodepoint()) |cp| {
        const result = switch (field) {
@ -144,13 +163,7 @@ fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) TransformE
    return true;
 }

-pub fn to_upper(allocator: std.mem.Allocator, text: []const u8) error{
-    InvalidUtf8,
-    OutOfMemory,
-    Utf8CannotEncodeSurrogateHalf,
-    CodepointTooLarge,
-    WriteFailed,
-}![]u8 {
+pub fn to_upper(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
    return utf8_transform(.simple_uppercase_mapping, allocator, text);
 }

@ -162,12 +175,68 @@ pub fn case_fold(allocator: std.mem.Allocator, text: []const u8) TransformError!
    return utf8_transform(.case_folding_simple, allocator, text);
 }

+pub fn case_folded_write(writer: *std.Io.Writer, text: []const u8) TransformError!void {
+    return utf8_write_transform(.case_folding_simple, writer, text);
+}
+
+pub fn case_folded_write_partial(writer: *std.Io.Writer, text: []const u8) TransformError![]const u8 {
+    return utf8_partial_write_transform(.case_folding_simple, writer, text);
+}
+
 pub fn switch_case(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
-    return if (try utf8_predicate(.is_lowercase, text))
+    return if (utf8_predicate(.is_lowercase, text))
        to_upper(allocator, text)
    else
        to_lower(allocator, text);
 }

+pub fn is_lowercase(text: []const u8) bool {
+    return utf8_predicate(.is_lowercase, text);
+}
+
 const std = @import("std");
 const uucode = @import("vaxis").uucode;
+
+const utf16LeToUtf8 = std.unicode.utf16LeToUtf8;
+const utf8ByteSequenceLength = std.unicode.utf8ByteSequenceLength;
+const utf8Decode = std.unicode.utf8Decode;
+const utf8Encode = std.unicode.utf8Encode;
+const Utf8View = std.unicode.Utf8View;
+
+const Utf8PartialIterator = struct {
+    bytes: []const u8,
+    end: usize,
+
+    fn nextCodepointSlice(it: *Utf8PartialIterator) ?[]const u8 {
+        if (it.end >= it.bytes.len) {
+            return null;
+        }
+
+        const cp_len = utf8ByteSequenceLength(it.bytes[it.end]) catch unreachable;
+        if (it.end + cp_len > it.bytes.len) {
+            return null;
+        }
+        it.end += cp_len;
+        return it.bytes[it.end - cp_len .. it.end];
+    }
+
+    fn nextCodepoint(it: *Utf8PartialIterator) ?u21 {
+        const slice = it.nextCodepointSlice() orelse return null;
+        return utf8Decode(slice) catch unreachable;
+    }
+};
+
+const Utf8PartialView = struct {
+    bytes: []const u8,
+
+    fn initUnchecked(s: []const u8) Utf8PartialView {
+        return Utf8PartialView{ .bytes = s };
+    }
+
+    fn iterator(s: Utf8PartialView) Utf8PartialIterator {
+        return Utf8PartialIterator{
+            .bytes = s.bytes,
+            .end = 0,
+        };
+    }
+};
--- a/src/keybind/builtin/flow.json
+++ b/src/keybind/builtin/flow.json
@ -558,6 +558,7 @@
            ["ctrl+space", "mini_mode_cancel"],
            ["ctrl+enter", "mini_mode_insert_bytes", "\n"],
            ["ctrl+backspace", "mini_mode_reset"],
+            ["alt+c", "toggle_find_mode"],
            ["alt+v", "system_paste"],
            ["alt+n", "goto_next_match"],
            ["alt+p", "goto_prev_match"],
--- a/src/tui/editor.zig
+++ b/src/tui/editor.zig
@ -5218,12 +5218,16 @@ pub const Editor = struct {
    pub fn find_query(self: *Self, ctx: Context) Result {
        var query: []const u8 = undefined;
        var match_type: Match.Type = undefined;
+        var find_mode: Buffer.FindMode = .exact;
        if (ctx.args.match(.{tp.extract(&query)}) catch false) {
            self.match_type = .find;
-            try self.find_in_buffer(query, .none);
+            try self.find_in_buffer(query, .none, find_mode);
            self.clamp();
        } else if (ctx.args.match(.{ tp.extract(&query), tp.extract(&match_type) }) catch false) {
-            try self.find_in_buffer(query, match_type);
+            try self.find_in_buffer(query, match_type, find_mode);
+            self.clamp();
+        } else if (ctx.args.match(.{ tp.extract(&query), tp.extract(&match_type), tp.extract(&find_mode) }) catch false) {
+            try self.find_in_buffer(query, match_type, find_mode);
            self.clamp();
        } else return error.InvalidFindQueryArgument;
    }
@ -5233,7 +5237,7 @@ pub const Editor = struct {
        _ = ctx;
        const query: []const u8 = try self.copy_word_at_cursor(self.allocator);
        defer self.allocator.free(query);
-        try self.find_in_buffer(query, .find);
+        try self.find_in_buffer(query, .find, .exact);
    }
    pub const find_word_at_cursor_meta: Meta = .{ .description = "Search for the word under the cursor" };

@ -5274,13 +5278,13 @@ pub const Editor = struct {
        } else self.last_find_query = self.allocator.dupe(u8, query) catch return;
    }

-    pub fn find_in_buffer(self: *Self, query: []const u8, match_type: Match.Type) !void {
+    pub fn find_in_buffer(self: *Self, query: []const u8, match_type: Match.Type, find_mode: Buffer.FindMode) !void {
        self.set_last_find_query(query, match_type);
        self.match_type = match_type;
-        return self.find_in_buffer_sync(query);
+        return self.find_in_buffer_sync(query, find_mode);
    }

-    fn find_in_buffer_sync(self: *Self, query: []const u8) !void {
+    fn find_in_buffer_sync(self: *Self, query: []const u8, mode: Buffer.FindMode) !void {
        const Ctx = struct {
            matches: usize = 0,
            self: *Self,
@ -5296,7 +5300,7 @@ pub const Editor = struct {
        defer self.add_match_done();
        var ctx: Ctx = .{ .self = self };
        self.init_matches_update();
-        try root.find_all_ranges(query, &ctx, Ctx.cb, .exact, self.allocator);
+        try root.find_all_ranges(query, &ctx, Ctx.cb, mode, self.allocator);
    }

    fn find_in_buffer_async(self: *Self, query: []const u8) !void {
@ -5511,7 +5515,7 @@ pub const Editor = struct {
        if (self.matches.items.len == 0) {
            if (self.last_find_query) |last| {
                self.find_operation = .goto_next_match;
-                try self.find_in_buffer(last, self.last_find_query_match_type);
+                try self.find_in_buffer(last, self.last_find_query_match_type, .exact);
            }
        }
        try self.move_cursor_next_match(ctx);
@ -5540,7 +5544,7 @@ pub const Editor = struct {
        if (self.matches.items.len == 0) {
            if (self.last_find_query) |last| {
                self.find_operation = .goto_prev_match;
-                try self.find_in_buffer(last, self.last_find_query_match_type);
+                try self.find_in_buffer(last, self.last_find_query_match_type, .exact);
            }
        }
        try self.move_cursor_prev_match(ctx);
--- a/src/tui/mode/mini/find.zig
+++ b/src/tui/mode/mini/find.zig
@ -1,9 +1,11 @@
 const tp = @import("thespian");
+const cbor = @import("cbor");

 const input = @import("input");
 const keybind = @import("keybind");
 const command = @import("command");
 const EventHandler = @import("EventHandler");
+const Buffer = @import("Buffer");

 const tui = @import("../../tui.zig");
 const ed = @import("../../editor.zig");
@ -14,11 +16,17 @@ const ArrayList = @import("std").ArrayList;

 const Self = @This();
 const name = "󱎸 find";
+const name_auto = name;
+const name_exact = name ++ "  ";
+const name_case_folded = name ++ "  ";

 const Commands = command.Collection(cmds);

+const Mode = enum { auto, exact, case_folded };
+
 allocator: Allocator,
 input_: ArrayList(u8),
+find_mode: Mode = .auto,
 last_input: ArrayList(u8),
 start_view: ed.View,
 start_cursor: ed.Cursor,
@ -26,7 +34,7 @@ editor: *ed.Editor,
 history_pos: ?usize = null,
 commands: Commands = undefined,

-pub fn create(allocator: Allocator, _: command.Context) !struct { tui.Mode, tui.MiniMode } {
+pub fn create(allocator: Allocator, ctx: command.Context) !struct { tui.Mode, tui.MiniMode } {
    const editor = tui.get_active_editor() orelse return error.NotFound;
    const self = try allocator.create(Self);
    errdefer allocator.destroy(self);
@ -39,7 +47,11 @@ pub fn create(allocator: Allocator, _: command.Context) !struct { tui.Mode, tui.
        .editor = editor,
    };
    try self.commands.init(self);
-    if (editor.get_primary().selection) |sel| ret: {
+    _ = ctx.args.match(.{cbor.extract(&self.find_mode)}) catch {};
+    var query: []const u8 = undefined;
+    if (ctx.args.match(.{ cbor.extract(&self.find_mode), cbor.extract(&query) }) catch false) {
+        try self.input_.appendSlice(self.allocator, query);
+    } else if (editor.get_primary().selection) |sel| ret: {
        const text = editor.get_selection(sel, self.allocator) catch break :ret;
        defer self.allocator.free(text);
        try self.input_.appendSlice(self.allocator, text);
@ -48,7 +60,11 @@ pub fn create(allocator: Allocator, _: command.Context) !struct { tui.Mode, tui.
        .insert_command = "mini_mode_insert_bytes",
    });
    mode.event_handler = EventHandler.to_owned(self);
-    return .{ mode, .{ .name = name } };
+    return .{ mode, .{ .name = switch (self.find_mode) {
+        .auto => name_auto,
+        .exact => name_exact,
+        .case_folded => name_case_folded,
+    } } };
 }

 pub fn deinit(self: *Self) void {
@ -91,13 +107,21 @@ fn flush_input(self: *Self) !void {
        const primary = self.editor.get_primary();
        primary.selection = null;
        primary.cursor = self.start_cursor;
-        try self.editor.find_in_buffer(self.input_.items, .find);
+        try self.editor.find_in_buffer(self.input_.items, .find, switch (self.find_mode) {
+            .auto => self.auto_detect_mode(),
+            .exact => .exact,
+            .case_folded => .case_folded,
+        });
    } else {
        self.editor.get_primary().selection = null;
        self.editor.init_matches_update();
    }
 }

+fn auto_detect_mode(self: *Self) Buffer.FindMode {
+    return if (Buffer.unicode.is_lowercase(self.input_.items)) .case_folded else .exact;
+}
+
 fn cmd(self: *Self, name_: []const u8, ctx: command.Context) tp.result {
    self.flush_input() catch {};
    return command.executeName(name_, ctx);
@ -153,6 +177,19 @@ const cmds = struct {
    const Meta = command.Metadata;
    const Result = command.Result;

+    pub fn toggle_find_mode(self: *Self, _: Ctx) Result {
+        const new_find_mode: Buffer.FindMode = switch (self.find_mode) {
+            .exact => .case_folded,
+            .auto, .case_folded => .exact,
+        };
+        const allocator = self.allocator;
+        const query = try allocator.dupe(u8, self.input_.items);
+        defer allocator.free(query);
+        self.cancel();
+        command.executeName("find", command.fmt(.{ new_find_mode, query })) catch {};
+    }
+    pub const toggle_find_mode_meta: Meta = .{ .description = "Toggle find mode" };
+
    pub fn mini_mode_reset(self: *Self, _: Ctx) Result {
        self.input_.clearRetainingCapacity();
        self.update_mini_mode_text();
Author	SHA1	Message	Date
CJ van den Berg	99f9f95dbc	fix: use a partial write capable case folding writer in Buffer.find_all_ranges This fixes case insensitive search. Previously the case folding would fail on input slices that contain partial utf8 sequences, which is normal in the buffer write process design. Now these partial utf8 sequences are not consumed and instead pushed to the next write call where they will be completed from the main buffer contents.	2025-11-26 09:56:39 +01:00
CJ van den Berg	68b17301cd	refactor: use unchecked Utf8View in utf8 transformation Internally we use only validated utf8 and unchecked performs much better and reduces unused error values.	2025-11-26 09:33:04 +01:00
CJ van den Berg	2520a37a90	feat: add case auto and case insensitive mode to find	2025-11-25 22:48:11 +01:00
CJ van den Berg	abee93d366	refactor: add unicode.is_lowercase	2025-11-25 22:35:14 +01:00
CJ van den Berg	047409cd29	feat: add find_mode parameter to Editor.find_query	2025-11-25 21:51:23 +01:00
CJ van den Berg	64d95ee009	refactor: move FindMode to Buffer	2025-11-25 21:50:36 +01:00
CJ van den Berg	3ebe68a384	refactor: implement find_all_ranges mode .case_folded	2025-11-25 21:43:12 +01:00
CJ van den Berg	37428bd698	refactor: add unicode.case_folded_write	2025-11-25 21:42:51 +01:00