Compare commits

...

8 commits

Author SHA1 Message Date
99f9f95dbc
fix: use a partial write capable case folding writer in Buffer.find_all_ranges
This fixes case insensitive search. Previously the case folding would fail on
input slices that contain partial utf8 sequences, which is normal in the
buffer write process design. Now these partial utf8 sequences are not consumed
and instead pushed to the next write call where they will be completed from the
main buffer contents.
2025-11-26 09:56:39 +01:00
68b17301cd
refactor: use unchecked Utf8View in utf8 transformation
Internally we use only validated utf8 and unchecked performs much better
and reduces unused error values.
2025-11-26 09:33:04 +01:00
2520a37a90
feat: add case auto and case insensitive mode to find 2025-11-25 22:48:11 +01:00
abee93d366
refactor: add unicode.is_lowercase 2025-11-25 22:35:14 +01:00
047409cd29
feat: add find_mode parameter to Editor.find_query 2025-11-25 21:51:23 +01:00
64d95ee009
refactor: move FindMode to Buffer 2025-11-25 21:50:36 +01:00
3ebe68a384
refactor: implement find_all_ranges mode .case_folded 2025-11-25 21:43:12 +01:00
37428bd698
refactor: add unicode.case_folded_write 2025-11-25 21:42:51 +01:00
5 changed files with 151 additions and 33 deletions

View file

@ -17,6 +17,8 @@ pub const Cursor = @import("Cursor.zig");
pub const View = @import("View.zig");
pub const Selection = @import("Selection.zig");
pub const FindMode = enum { exact, case_folded };
pub const Metrics = struct {
ctx: *const anyopaque,
egc_length: egc_length_func,
@ -943,10 +945,10 @@ const Node = union(enum) {
}
}
pub const FindMode = enum { exact, case_folded };
pub const FindAllCallback = fn (data: *anyopaque, begin_row: usize, begin_col: usize, end_row: usize, end_col: usize) error{Stop}!void;
pub fn find_all_ranges(self: *const Node, pattern: []const u8, data: *anyopaque, callback: *const FindAllCallback, mode: FindMode, allocator: Allocator) error{ OutOfMemory, Stop }!void {
const Ctx = struct {
allocator: std.mem.Allocator,
pattern: []const u8,
data: *anyopaque,
callback: *const FindAllCallback,
@ -985,7 +987,11 @@ const Node = union(enum) {
input = input[input_consume_size..];
},
.case_folded => {
@panic("unimplemented");
const input_consume_size = @min(ctx.buf.len - ctx.rest.len, input.len);
var writer = std.Io.Writer.fixed(ctx.buf[ctx.rest.len..]);
const folded = unicode.case_folded_write_partial(&writer, input[0..input_consume_size]) catch return error.WriteFailed;
ctx.rest = ctx.buf[0 .. ctx.rest.len + folded.len];
input = input[folded.len..];
},
}
@ -1030,6 +1036,7 @@ const Node = union(enum) {
}
};
var ctx: Ctx = .{
.allocator = allocator,
.pattern = pattern,
.data = data,
.callback = callback,

View file

@ -89,7 +89,7 @@ fn raw_byte_to_utf8(cp: u8, buf: []u8) ![]const u8 {
var utf16le: [1]u16 = undefined;
const utf16le_as_bytes = std.mem.sliceAsBytes(utf16le[0..]);
std.mem.writeInt(u16, utf16le_as_bytes[0..2], cp, .little);
return buf[0..try std.unicode.utf16LeToUtf8(buf, &utf16le)];
return buf[0..try utf16LeToUtf8(buf, &utf16le)];
}
pub fn utf8_sanitize(allocator: std.mem.Allocator, input: []const u8) error{
@ -106,17 +106,14 @@ pub fn utf8_sanitize(allocator: std.mem.Allocator, input: []const u8) error{
}
pub const TransformError = error{
InvalidUtf8,
OutOfMemory,
Utf8CannotEncodeSurrogateHalf,
CodepointTooLarge,
WriteFailed,
};
fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
var result: std.Io.Writer.Allocating = .init(allocator);
defer result.deinit();
const view: std.unicode.Utf8View = try .init(text);
fn utf8_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError!void {
const view: Utf8View = .initUnchecked(text);
var it = view.iterator();
while (it.nextCodepoint()) |cp| {
const cp_ = switch (field) {
@ -125,14 +122,36 @@ fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator
else => @compileError(@tagName(field) ++ " is not a unicode transformation"),
};
var utf8_buf: [6]u8 = undefined;
const size = try std.unicode.utf8Encode(cp_, &utf8_buf);
try result.writer.writeAll(utf8_buf[0..size]);
const size = try utf8Encode(cp_, &utf8_buf);
try writer.writeAll(utf8_buf[0..size]);
}
}
fn utf8_partial_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError![]const u8 {
const view: Utf8PartialView = .initUnchecked(text);
var it = view.iterator();
while (it.nextCodepoint()) |cp| {
const cp_ = switch (field) {
.simple_uppercase_mapping, .simple_lowercase_mapping => uucode.get(field, cp) orelse cp,
.case_folding_simple => uucode.get(field, cp),
else => @compileError(@tagName(field) ++ " is not a unicode transformation"),
};
var utf8_buf: [6]u8 = undefined;
const size = try utf8Encode(cp_, &utf8_buf);
try writer.writeAll(utf8_buf[0..size]);
}
return text[0..it.end];
}
fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
var result: std.Io.Writer.Allocating = .init(allocator);
defer result.deinit();
try utf8_write_transform(field, &result.writer, text);
return result.toOwnedSlice();
}
fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) TransformError!bool {
const view: std.unicode.Utf8View = try .init(text);
fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) bool {
const view: Utf8View = .initUnchecked(text);
var it = view.iterator();
while (it.nextCodepoint()) |cp| {
const result = switch (field) {
@ -144,13 +163,7 @@ fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) TransformE
return true;
}
pub fn to_upper(allocator: std.mem.Allocator, text: []const u8) error{
InvalidUtf8,
OutOfMemory,
Utf8CannotEncodeSurrogateHalf,
CodepointTooLarge,
WriteFailed,
}![]u8 {
pub fn to_upper(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
return utf8_transform(.simple_uppercase_mapping, allocator, text);
}
@ -162,12 +175,68 @@ pub fn case_fold(allocator: std.mem.Allocator, text: []const u8) TransformError!
return utf8_transform(.case_folding_simple, allocator, text);
}
pub fn case_folded_write(writer: *std.Io.Writer, text: []const u8) TransformError!void {
return utf8_write_transform(.case_folding_simple, writer, text);
}
pub fn case_folded_write_partial(writer: *std.Io.Writer, text: []const u8) TransformError![]const u8 {
return utf8_partial_write_transform(.case_folding_simple, writer, text);
}
pub fn switch_case(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
return if (try utf8_predicate(.is_lowercase, text))
return if (utf8_predicate(.is_lowercase, text))
to_upper(allocator, text)
else
to_lower(allocator, text);
}
pub fn is_lowercase(text: []const u8) bool {
return utf8_predicate(.is_lowercase, text);
}
const std = @import("std");
const uucode = @import("vaxis").uucode;
const utf16LeToUtf8 = std.unicode.utf16LeToUtf8;
const utf8ByteSequenceLength = std.unicode.utf8ByteSequenceLength;
const utf8Decode = std.unicode.utf8Decode;
const utf8Encode = std.unicode.utf8Encode;
const Utf8View = std.unicode.Utf8View;
const Utf8PartialIterator = struct {
bytes: []const u8,
end: usize,
fn nextCodepointSlice(it: *Utf8PartialIterator) ?[]const u8 {
if (it.end >= it.bytes.len) {
return null;
}
const cp_len = utf8ByteSequenceLength(it.bytes[it.end]) catch unreachable;
if (it.end + cp_len > it.bytes.len) {
return null;
}
it.end += cp_len;
return it.bytes[it.end - cp_len .. it.end];
}
fn nextCodepoint(it: *Utf8PartialIterator) ?u21 {
const slice = it.nextCodepointSlice() orelse return null;
return utf8Decode(slice) catch unreachable;
}
};
const Utf8PartialView = struct {
bytes: []const u8,
fn initUnchecked(s: []const u8) Utf8PartialView {
return Utf8PartialView{ .bytes = s };
}
fn iterator(s: Utf8PartialView) Utf8PartialIterator {
return Utf8PartialIterator{
.bytes = s.bytes,
.end = 0,
};
}
};

View file

@ -558,6 +558,7 @@
["ctrl+space", "mini_mode_cancel"],
["ctrl+enter", "mini_mode_insert_bytes", "\n"],
["ctrl+backspace", "mini_mode_reset"],
["alt+c", "toggle_find_mode"],
["alt+v", "system_paste"],
["alt+n", "goto_next_match"],
["alt+p", "goto_prev_match"],

View file

@ -5218,12 +5218,16 @@ pub const Editor = struct {
pub fn find_query(self: *Self, ctx: Context) Result {
var query: []const u8 = undefined;
var match_type: Match.Type = undefined;
var find_mode: Buffer.FindMode = .exact;
if (ctx.args.match(.{tp.extract(&query)}) catch false) {
self.match_type = .find;
try self.find_in_buffer(query, .none);
try self.find_in_buffer(query, .none, find_mode);
self.clamp();
} else if (ctx.args.match(.{ tp.extract(&query), tp.extract(&match_type) }) catch false) {
try self.find_in_buffer(query, match_type);
try self.find_in_buffer(query, match_type, find_mode);
self.clamp();
} else if (ctx.args.match(.{ tp.extract(&query), tp.extract(&match_type), tp.extract(&find_mode) }) catch false) {
try self.find_in_buffer(query, match_type, find_mode);
self.clamp();
} else return error.InvalidFindQueryArgument;
}
@ -5233,7 +5237,7 @@ pub const Editor = struct {
_ = ctx;
const query: []const u8 = try self.copy_word_at_cursor(self.allocator);
defer self.allocator.free(query);
try self.find_in_buffer(query, .find);
try self.find_in_buffer(query, .find, .exact);
}
pub const find_word_at_cursor_meta: Meta = .{ .description = "Search for the word under the cursor" };
@ -5274,13 +5278,13 @@ pub const Editor = struct {
} else self.last_find_query = self.allocator.dupe(u8, query) catch return;
}
pub fn find_in_buffer(self: *Self, query: []const u8, match_type: Match.Type) !void {
pub fn find_in_buffer(self: *Self, query: []const u8, match_type: Match.Type, find_mode: Buffer.FindMode) !void {
self.set_last_find_query(query, match_type);
self.match_type = match_type;
return self.find_in_buffer_sync(query);
return self.find_in_buffer_sync(query, find_mode);
}
fn find_in_buffer_sync(self: *Self, query: []const u8) !void {
fn find_in_buffer_sync(self: *Self, query: []const u8, mode: Buffer.FindMode) !void {
const Ctx = struct {
matches: usize = 0,
self: *Self,
@ -5296,7 +5300,7 @@ pub const Editor = struct {
defer self.add_match_done();
var ctx: Ctx = .{ .self = self };
self.init_matches_update();
try root.find_all_ranges(query, &ctx, Ctx.cb, .exact, self.allocator);
try root.find_all_ranges(query, &ctx, Ctx.cb, mode, self.allocator);
}
fn find_in_buffer_async(self: *Self, query: []const u8) !void {
@ -5511,7 +5515,7 @@ pub const Editor = struct {
if (self.matches.items.len == 0) {
if (self.last_find_query) |last| {
self.find_operation = .goto_next_match;
try self.find_in_buffer(last, self.last_find_query_match_type);
try self.find_in_buffer(last, self.last_find_query_match_type, .exact);
}
}
try self.move_cursor_next_match(ctx);
@ -5540,7 +5544,7 @@ pub const Editor = struct {
if (self.matches.items.len == 0) {
if (self.last_find_query) |last| {
self.find_operation = .goto_prev_match;
try self.find_in_buffer(last, self.last_find_query_match_type);
try self.find_in_buffer(last, self.last_find_query_match_type, .exact);
}
}
try self.move_cursor_prev_match(ctx);

View file

@ -1,9 +1,11 @@
const tp = @import("thespian");
const cbor = @import("cbor");
const input = @import("input");
const keybind = @import("keybind");
const command = @import("command");
const EventHandler = @import("EventHandler");
const Buffer = @import("Buffer");
const tui = @import("../../tui.zig");
const ed = @import("../../editor.zig");
@ -14,11 +16,17 @@ const ArrayList = @import("std").ArrayList;
const Self = @This();
const name = "󱎸 find";
const name_auto = name;
const name_exact = name ++ "";
const name_case_folded = name ++ "";
const Commands = command.Collection(cmds);
const Mode = enum { auto, exact, case_folded };
allocator: Allocator,
input_: ArrayList(u8),
find_mode: Mode = .auto,
last_input: ArrayList(u8),
start_view: ed.View,
start_cursor: ed.Cursor,
@ -26,7 +34,7 @@ editor: *ed.Editor,
history_pos: ?usize = null,
commands: Commands = undefined,
pub fn create(allocator: Allocator, _: command.Context) !struct { tui.Mode, tui.MiniMode } {
pub fn create(allocator: Allocator, ctx: command.Context) !struct { tui.Mode, tui.MiniMode } {
const editor = tui.get_active_editor() orelse return error.NotFound;
const self = try allocator.create(Self);
errdefer allocator.destroy(self);
@ -39,7 +47,11 @@ pub fn create(allocator: Allocator, _: command.Context) !struct { tui.Mode, tui.
.editor = editor,
};
try self.commands.init(self);
if (editor.get_primary().selection) |sel| ret: {
_ = ctx.args.match(.{cbor.extract(&self.find_mode)}) catch {};
var query: []const u8 = undefined;
if (ctx.args.match(.{ cbor.extract(&self.find_mode), cbor.extract(&query) }) catch false) {
try self.input_.appendSlice(self.allocator, query);
} else if (editor.get_primary().selection) |sel| ret: {
const text = editor.get_selection(sel, self.allocator) catch break :ret;
defer self.allocator.free(text);
try self.input_.appendSlice(self.allocator, text);
@ -48,7 +60,11 @@ pub fn create(allocator: Allocator, _: command.Context) !struct { tui.Mode, tui.
.insert_command = "mini_mode_insert_bytes",
});
mode.event_handler = EventHandler.to_owned(self);
return .{ mode, .{ .name = name } };
return .{ mode, .{ .name = switch (self.find_mode) {
.auto => name_auto,
.exact => name_exact,
.case_folded => name_case_folded,
} } };
}
pub fn deinit(self: *Self) void {
@ -91,13 +107,21 @@ fn flush_input(self: *Self) !void {
const primary = self.editor.get_primary();
primary.selection = null;
primary.cursor = self.start_cursor;
try self.editor.find_in_buffer(self.input_.items, .find);
try self.editor.find_in_buffer(self.input_.items, .find, switch (self.find_mode) {
.auto => self.auto_detect_mode(),
.exact => .exact,
.case_folded => .case_folded,
});
} else {
self.editor.get_primary().selection = null;
self.editor.init_matches_update();
}
}
fn auto_detect_mode(self: *Self) Buffer.FindMode {
return if (Buffer.unicode.is_lowercase(self.input_.items)) .case_folded else .exact;
}
fn cmd(self: *Self, name_: []const u8, ctx: command.Context) tp.result {
self.flush_input() catch {};
return command.executeName(name_, ctx);
@ -153,6 +177,19 @@ const cmds = struct {
const Meta = command.Metadata;
const Result = command.Result;
pub fn toggle_find_mode(self: *Self, _: Ctx) Result {
const new_find_mode: Buffer.FindMode = switch (self.find_mode) {
.exact => .case_folded,
.auto, .case_folded => .exact,
};
const allocator = self.allocator;
const query = try allocator.dupe(u8, self.input_.items);
defer allocator.free(query);
self.cancel();
command.executeName("find", command.fmt(.{ new_find_mode, query })) catch {};
}
pub const toggle_find_mode_meta: Meta = .{ .description = "Toggle find mode" };
pub fn mini_mode_reset(self: *Self, _: Ctx) Result {
self.input_.clearRetainingCapacity();
self.update_mini_mode_text();