From 37428bd698b35b6e2fa6907c6179d3baeda778e1 Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Tue, 25 Nov 2025 21:42:51 +0100 Subject: [PATCH 1/8] refactor: add unicode.case_folded_write --- src/buffer/unicode.zig | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/buffer/unicode.zig b/src/buffer/unicode.zig index c16c1b3..8be7026 100644 --- a/src/buffer/unicode.zig +++ b/src/buffer/unicode.zig @@ -113,9 +113,7 @@ pub const TransformError = error{ WriteFailed, }; -fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 { - var result: std.Io.Writer.Allocating = .init(allocator); - defer result.deinit(); +fn utf8_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError!void { const view: std.unicode.Utf8View = try .init(text); var it = view.iterator(); while (it.nextCodepoint()) |cp| { @@ -126,8 +124,14 @@ fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator }; var utf8_buf: [6]u8 = undefined; const size = try std.unicode.utf8Encode(cp_, &utf8_buf); - try result.writer.writeAll(utf8_buf[0..size]); + try writer.writeAll(utf8_buf[0..size]); } +} + +fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 { + var result: std.Io.Writer.Allocating = .init(allocator); + defer result.deinit(); + try utf8_write_transform(field, &result.writer, text); return result.toOwnedSlice(); } @@ -162,6 +166,10 @@ pub fn case_fold(allocator: std.mem.Allocator, text: []const u8) TransformError! return utf8_transform(.case_folding_simple, allocator, text); } +pub fn case_folded_write(writer: *std.Io.Writer, text: []const u8) TransformError!void { + return utf8_write_transform(.case_folding_simple, writer, text); +} + pub fn switch_case(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 { return if (try utf8_predicate(.is_lowercase, text)) to_upper(allocator, text) From 3ebe68a384fb599711d1924653e1b843b5455c35 Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Tue, 25 Nov 2025 21:43:12 +0100 Subject: [PATCH 2/8] refactor: implement find_all_ranges mode .case_folded --- src/buffer/Buffer.zig | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/buffer/Buffer.zig b/src/buffer/Buffer.zig index 80470d9..202007a 100644 --- a/src/buffer/Buffer.zig +++ b/src/buffer/Buffer.zig @@ -947,6 +947,7 @@ const Node = union(enum) { pub const FindAllCallback = fn (data: *anyopaque, begin_row: usize, begin_col: usize, end_row: usize, end_col: usize) error{Stop}!void; pub fn find_all_ranges(self: *const Node, pattern: []const u8, data: *anyopaque, callback: *const FindAllCallback, mode: FindMode, allocator: Allocator) error{ OutOfMemory, Stop }!void { const Ctx = struct { + allocator: std.mem.Allocator, pattern: []const u8, data: *anyopaque, callback: *const FindAllCallback, @@ -985,7 +986,11 @@ const Node = union(enum) { input = input[input_consume_size..]; }, .case_folded => { - @panic("unimplemented"); + const input_consume_size = @min(ctx.buf.len - ctx.rest.len, input.len); + var writer = std.Io.Writer.fixed(ctx.buf[ctx.rest.len..]); + unicode.case_folded_write(&writer, input[0..input_consume_size]) catch return error.WriteFailed; + ctx.rest = ctx.buf[0 .. ctx.rest.len + writer.end]; + input = input[input_consume_size..]; }, } @@ -1030,6 +1035,7 @@ const Node = union(enum) { } }; var ctx: Ctx = .{ + .allocator = allocator, .pattern = pattern, .data = data, .callback = callback, From 64d95ee00942fd2dbb6602102afdbb68338b04e2 Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Tue, 25 Nov 2025 21:50:36 +0100 Subject: [PATCH 3/8] refactor: move FindMode to Buffer --- src/buffer/Buffer.zig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/buffer/Buffer.zig b/src/buffer/Buffer.zig index 202007a..3173a1c 100644 --- a/src/buffer/Buffer.zig +++ b/src/buffer/Buffer.zig @@ -17,6 +17,8 @@ pub const Cursor = @import("Cursor.zig"); pub const View = @import("View.zig"); pub const Selection = @import("Selection.zig"); +pub const FindMode = enum { exact, case_folded }; + pub const Metrics = struct { ctx: *const anyopaque, egc_length: egc_length_func, @@ -943,7 +945,6 @@ const Node = union(enum) { } } - pub const FindMode = enum { exact, case_folded }; pub const FindAllCallback = fn (data: *anyopaque, begin_row: usize, begin_col: usize, end_row: usize, end_col: usize) error{Stop}!void; pub fn find_all_ranges(self: *const Node, pattern: []const u8, data: *anyopaque, callback: *const FindAllCallback, mode: FindMode, allocator: Allocator) error{ OutOfMemory, Stop }!void { const Ctx = struct { From 047409cd29c3978b271b4a59fdd05315578625c9 Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Tue, 25 Nov 2025 21:51:23 +0100 Subject: [PATCH 4/8] feat: add find_mode parameter to Editor.find_query --- src/tui/editor.zig | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/tui/editor.zig b/src/tui/editor.zig index 6ecdc91..7e988b8 100644 --- a/src/tui/editor.zig +++ b/src/tui/editor.zig @@ -5218,12 +5218,16 @@ pub const Editor = struct { pub fn find_query(self: *Self, ctx: Context) Result { var query: []const u8 = undefined; var match_type: Match.Type = undefined; + var find_mode: Buffer.FindMode = .exact; if (ctx.args.match(.{tp.extract(&query)}) catch false) { self.match_type = .find; - try self.find_in_buffer(query, .none); + try self.find_in_buffer(query, .none, find_mode); self.clamp(); } else if (ctx.args.match(.{ tp.extract(&query), tp.extract(&match_type) }) catch false) { - try self.find_in_buffer(query, match_type); + try self.find_in_buffer(query, match_type, find_mode); + self.clamp(); + } else if (ctx.args.match(.{ tp.extract(&query), tp.extract(&match_type), tp.extract(&find_mode) }) catch false) { + try self.find_in_buffer(query, match_type, find_mode); self.clamp(); } else return error.InvalidFindQueryArgument; } @@ -5233,7 +5237,7 @@ pub const Editor = struct { _ = ctx; const query: []const u8 = try self.copy_word_at_cursor(self.allocator); defer self.allocator.free(query); - try self.find_in_buffer(query, .find); + try self.find_in_buffer(query, .find, .exact); } pub const find_word_at_cursor_meta: Meta = .{ .description = "Search for the word under the cursor" }; @@ -5274,13 +5278,13 @@ pub const Editor = struct { } else self.last_find_query = self.allocator.dupe(u8, query) catch return; } - pub fn find_in_buffer(self: *Self, query: []const u8, match_type: Match.Type) !void { + pub fn find_in_buffer(self: *Self, query: []const u8, match_type: Match.Type, find_mode: Buffer.FindMode) !void { self.set_last_find_query(query, match_type); self.match_type = match_type; - return self.find_in_buffer_sync(query); + return self.find_in_buffer_sync(query, find_mode); } - fn find_in_buffer_sync(self: *Self, query: []const u8) !void { + fn find_in_buffer_sync(self: *Self, query: []const u8, mode: Buffer.FindMode) !void { const Ctx = struct { matches: usize = 0, self: *Self, @@ -5296,7 +5300,7 @@ pub const Editor = struct { defer self.add_match_done(); var ctx: Ctx = .{ .self = self }; self.init_matches_update(); - try root.find_all_ranges(query, &ctx, Ctx.cb, .exact, self.allocator); + try root.find_all_ranges(query, &ctx, Ctx.cb, mode, self.allocator); } fn find_in_buffer_async(self: *Self, query: []const u8) !void { @@ -5511,7 +5515,7 @@ pub const Editor = struct { if (self.matches.items.len == 0) { if (self.last_find_query) |last| { self.find_operation = .goto_next_match; - try self.find_in_buffer(last, self.last_find_query_match_type); + try self.find_in_buffer(last, self.last_find_query_match_type, .exact); } } try self.move_cursor_next_match(ctx); @@ -5540,7 +5544,7 @@ pub const Editor = struct { if (self.matches.items.len == 0) { if (self.last_find_query) |last| { self.find_operation = .goto_prev_match; - try self.find_in_buffer(last, self.last_find_query_match_type); + try self.find_in_buffer(last, self.last_find_query_match_type, .exact); } } try self.move_cursor_prev_match(ctx); From abee93d366b0c5d941575a9c2406d1554f9c4769 Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Tue, 25 Nov 2025 22:35:14 +0100 Subject: [PATCH 5/8] refactor: add unicode.is_lowercase --- src/buffer/unicode.zig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/buffer/unicode.zig b/src/buffer/unicode.zig index 8be7026..2da6d18 100644 --- a/src/buffer/unicode.zig +++ b/src/buffer/unicode.zig @@ -135,7 +135,7 @@ fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator return result.toOwnedSlice(); } -fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) TransformError!bool { +fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) error{InvalidUtf8}!bool { const view: std.unicode.Utf8View = try .init(text); var it = view.iterator(); while (it.nextCodepoint()) |cp| { @@ -177,5 +177,9 @@ pub fn switch_case(allocator: std.mem.Allocator, text: []const u8) TransformErro to_lower(allocator, text); } +pub fn is_lowercase(text: []const u8) error{InvalidUtf8}!bool { + return try utf8_predicate(.is_lowercase, text); +} + const std = @import("std"); const uucode = @import("vaxis").uucode; From 2520a37a9076a29897bc79b68a56ef7583406e80 Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Tue, 25 Nov 2025 22:48:11 +0100 Subject: [PATCH 6/8] feat: add case auto and case insensitive mode to find --- src/keybind/builtin/flow.json | 1 + src/tui/mode/mini/find.zig | 45 +++++++++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/keybind/builtin/flow.json b/src/keybind/builtin/flow.json index 989ad24..264a25b 100644 --- a/src/keybind/builtin/flow.json +++ b/src/keybind/builtin/flow.json @@ -558,6 +558,7 @@ ["ctrl+space", "mini_mode_cancel"], ["ctrl+enter", "mini_mode_insert_bytes", "\n"], ["ctrl+backspace", "mini_mode_reset"], + ["alt+c", "toggle_find_mode"], ["alt+v", "system_paste"], ["alt+n", "goto_next_match"], ["alt+p", "goto_prev_match"], diff --git a/src/tui/mode/mini/find.zig b/src/tui/mode/mini/find.zig index 45a139b..813a0e9 100644 --- a/src/tui/mode/mini/find.zig +++ b/src/tui/mode/mini/find.zig @@ -1,9 +1,11 @@ const tp = @import("thespian"); +const cbor = @import("cbor"); const input = @import("input"); const keybind = @import("keybind"); const command = @import("command"); const EventHandler = @import("EventHandler"); +const Buffer = @import("Buffer"); const tui = @import("../../tui.zig"); const ed = @import("../../editor.zig"); @@ -14,11 +16,17 @@ const ArrayList = @import("std").ArrayList; const Self = @This(); const name = "󱎸 find"; +const name_auto = name; +const name_exact = name ++ "  "; +const name_case_folded = name ++ "  "; const Commands = command.Collection(cmds); +const Mode = enum { auto, exact, case_folded }; + allocator: Allocator, input_: ArrayList(u8), +find_mode: Mode = .auto, last_input: ArrayList(u8), start_view: ed.View, start_cursor: ed.Cursor, @@ -26,7 +34,7 @@ editor: *ed.Editor, history_pos: ?usize = null, commands: Commands = undefined, -pub fn create(allocator: Allocator, _: command.Context) !struct { tui.Mode, tui.MiniMode } { +pub fn create(allocator: Allocator, ctx: command.Context) !struct { tui.Mode, tui.MiniMode } { const editor = tui.get_active_editor() orelse return error.NotFound; const self = try allocator.create(Self); errdefer allocator.destroy(self); @@ -39,7 +47,11 @@ pub fn create(allocator: Allocator, _: command.Context) !struct { tui.Mode, tui. .editor = editor, }; try self.commands.init(self); - if (editor.get_primary().selection) |sel| ret: { + _ = ctx.args.match(.{cbor.extract(&self.find_mode)}) catch {}; + var query: []const u8 = undefined; + if (ctx.args.match(.{ cbor.extract(&self.find_mode), cbor.extract(&query) }) catch false) { + try self.input_.appendSlice(self.allocator, query); + } else if (editor.get_primary().selection) |sel| ret: { const text = editor.get_selection(sel, self.allocator) catch break :ret; defer self.allocator.free(text); try self.input_.appendSlice(self.allocator, text); @@ -48,7 +60,11 @@ pub fn create(allocator: Allocator, _: command.Context) !struct { tui.Mode, tui. .insert_command = "mini_mode_insert_bytes", }); mode.event_handler = EventHandler.to_owned(self); - return .{ mode, .{ .name = name } }; + return .{ mode, .{ .name = switch (self.find_mode) { + .auto => name_auto, + .exact => name_exact, + .case_folded => name_case_folded, + } } }; } pub fn deinit(self: *Self) void { @@ -91,13 +107,21 @@ fn flush_input(self: *Self) !void { const primary = self.editor.get_primary(); primary.selection = null; primary.cursor = self.start_cursor; - try self.editor.find_in_buffer(self.input_.items, .find); + try self.editor.find_in_buffer(self.input_.items, .find, switch (self.find_mode) { + .auto => self.auto_detect_mode(), + .exact => .exact, + .case_folded => .case_folded, + }); } else { self.editor.get_primary().selection = null; self.editor.init_matches_update(); } } +fn auto_detect_mode(self: *Self) Buffer.FindMode { + return if (Buffer.unicode.is_lowercase(self.input_.items) catch return .exact) .case_folded else .exact; +} + fn cmd(self: *Self, name_: []const u8, ctx: command.Context) tp.result { self.flush_input() catch {}; return command.executeName(name_, ctx); @@ -153,6 +177,19 @@ const cmds = struct { const Meta = command.Metadata; const Result = command.Result; + pub fn toggle_find_mode(self: *Self, _: Ctx) Result { + const new_find_mode: Buffer.FindMode = switch (self.find_mode) { + .exact => .case_folded, + .auto, .case_folded => .exact, + }; + const allocator = self.allocator; + const query = try allocator.dupe(u8, self.input_.items); + defer allocator.free(query); + self.cancel(); + command.executeName("find", command.fmt(.{ new_find_mode, query })) catch {}; + } + pub const toggle_find_mode_meta: Meta = .{ .description = "Toggle find mode" }; + pub fn mini_mode_reset(self: *Self, _: Ctx) Result { self.input_.clearRetainingCapacity(); self.update_mini_mode_text(); From 68b17301cd68e4a6149faff16b4d66ea896d167f Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Wed, 26 Nov 2025 09:33:04 +0100 Subject: [PATCH 7/8] refactor: use unchecked Utf8View in utf8 transformation Internally we use only validated utf8 and unchecked performs much better and reduces unused error values. --- src/buffer/unicode.zig | 21 +++++++-------------- src/tui/mode/mini/find.zig | 2 +- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/src/buffer/unicode.zig b/src/buffer/unicode.zig index 2da6d18..036cfd8 100644 --- a/src/buffer/unicode.zig +++ b/src/buffer/unicode.zig @@ -106,7 +106,6 @@ pub fn utf8_sanitize(allocator: std.mem.Allocator, input: []const u8) error{ } pub const TransformError = error{ - InvalidUtf8, OutOfMemory, Utf8CannotEncodeSurrogateHalf, CodepointTooLarge, @@ -114,7 +113,7 @@ pub const TransformError = error{ }; fn utf8_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError!void { - const view: std.unicode.Utf8View = try .init(text); + const view: std.unicode.Utf8View = .initUnchecked(text); var it = view.iterator(); while (it.nextCodepoint()) |cp| { const cp_ = switch (field) { @@ -135,8 +134,8 @@ fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator return result.toOwnedSlice(); } -fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) error{InvalidUtf8}!bool { - const view: std.unicode.Utf8View = try .init(text); +fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) bool { + const view: std.unicode.Utf8View = .initUnchecked(text); var it = view.iterator(); while (it.nextCodepoint()) |cp| { const result = switch (field) { @@ -148,13 +147,7 @@ fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) error{Inva return true; } -pub fn to_upper(allocator: std.mem.Allocator, text: []const u8) error{ - InvalidUtf8, - OutOfMemory, - Utf8CannotEncodeSurrogateHalf, - CodepointTooLarge, - WriteFailed, -}![]u8 { +pub fn to_upper(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 { return utf8_transform(.simple_uppercase_mapping, allocator, text); } @@ -171,14 +164,14 @@ pub fn case_folded_write(writer: *std.Io.Writer, text: []const u8) TransformErro } pub fn switch_case(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 { - return if (try utf8_predicate(.is_lowercase, text)) + return if (utf8_predicate(.is_lowercase, text)) to_upper(allocator, text) else to_lower(allocator, text); } -pub fn is_lowercase(text: []const u8) error{InvalidUtf8}!bool { - return try utf8_predicate(.is_lowercase, text); +pub fn is_lowercase(text: []const u8) bool { + return utf8_predicate(.is_lowercase, text); } const std = @import("std"); diff --git a/src/tui/mode/mini/find.zig b/src/tui/mode/mini/find.zig index 813a0e9..b5f05c4 100644 --- a/src/tui/mode/mini/find.zig +++ b/src/tui/mode/mini/find.zig @@ -119,7 +119,7 @@ fn flush_input(self: *Self) !void { } fn auto_detect_mode(self: *Self) Buffer.FindMode { - return if (Buffer.unicode.is_lowercase(self.input_.items) catch return .exact) .case_folded else .exact; + return if (Buffer.unicode.is_lowercase(self.input_.items)) .case_folded else .exact; } fn cmd(self: *Self, name_: []const u8, ctx: command.Context) tp.result { From 99f9f95dbcc4a3f4cf7c171e7e1216a5622b79da Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Wed, 26 Nov 2025 09:56:39 +0100 Subject: [PATCH 8/8] fix: use a partial write capable case folding writer in Buffer.find_all_ranges This fixes case insensitive search. Previously the case folding would fail on input slices that contain partial utf8 sequences, which is normal in the buffer write process design. Now these partial utf8 sequences are not consumed and instead pushed to the next write call where they will be completed from the main buffer contents. --- src/buffer/Buffer.zig | 6 ++-- src/buffer/unicode.zig | 72 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 71 insertions(+), 7 deletions(-) diff --git a/src/buffer/Buffer.zig b/src/buffer/Buffer.zig index 3173a1c..bcf1b4b 100644 --- a/src/buffer/Buffer.zig +++ b/src/buffer/Buffer.zig @@ -989,9 +989,9 @@ const Node = union(enum) { .case_folded => { const input_consume_size = @min(ctx.buf.len - ctx.rest.len, input.len); var writer = std.Io.Writer.fixed(ctx.buf[ctx.rest.len..]); - unicode.case_folded_write(&writer, input[0..input_consume_size]) catch return error.WriteFailed; - ctx.rest = ctx.buf[0 .. ctx.rest.len + writer.end]; - input = input[input_consume_size..]; + const folded = unicode.case_folded_write_partial(&writer, input[0..input_consume_size]) catch return error.WriteFailed; + ctx.rest = ctx.buf[0 .. ctx.rest.len + folded.len]; + input = input[folded.len..]; }, } diff --git a/src/buffer/unicode.zig b/src/buffer/unicode.zig index 036cfd8..db24595 100644 --- a/src/buffer/unicode.zig +++ b/src/buffer/unicode.zig @@ -89,7 +89,7 @@ fn raw_byte_to_utf8(cp: u8, buf: []u8) ![]const u8 { var utf16le: [1]u16 = undefined; const utf16le_as_bytes = std.mem.sliceAsBytes(utf16le[0..]); std.mem.writeInt(u16, utf16le_as_bytes[0..2], cp, .little); - return buf[0..try std.unicode.utf16LeToUtf8(buf, &utf16le)]; + return buf[0..try utf16LeToUtf8(buf, &utf16le)]; } pub fn utf8_sanitize(allocator: std.mem.Allocator, input: []const u8) error{ @@ -113,7 +113,7 @@ pub const TransformError = error{ }; fn utf8_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError!void { - const view: std.unicode.Utf8View = .initUnchecked(text); + const view: Utf8View = .initUnchecked(text); var it = view.iterator(); while (it.nextCodepoint()) |cp| { const cp_ = switch (field) { @@ -122,11 +122,27 @@ fn utf8_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer else => @compileError(@tagName(field) ++ " is not a unicode transformation"), }; var utf8_buf: [6]u8 = undefined; - const size = try std.unicode.utf8Encode(cp_, &utf8_buf); + const size = try utf8Encode(cp_, &utf8_buf); try writer.writeAll(utf8_buf[0..size]); } } +fn utf8_partial_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError![]const u8 { + const view: Utf8PartialView = .initUnchecked(text); + var it = view.iterator(); + while (it.nextCodepoint()) |cp| { + const cp_ = switch (field) { + .simple_uppercase_mapping, .simple_lowercase_mapping => uucode.get(field, cp) orelse cp, + .case_folding_simple => uucode.get(field, cp), + else => @compileError(@tagName(field) ++ " is not a unicode transformation"), + }; + var utf8_buf: [6]u8 = undefined; + const size = try utf8Encode(cp_, &utf8_buf); + try writer.writeAll(utf8_buf[0..size]); + } + return text[0..it.end]; +} + fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 { var result: std.Io.Writer.Allocating = .init(allocator); defer result.deinit(); @@ -135,7 +151,7 @@ fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator } fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) bool { - const view: std.unicode.Utf8View = .initUnchecked(text); + const view: Utf8View = .initUnchecked(text); var it = view.iterator(); while (it.nextCodepoint()) |cp| { const result = switch (field) { @@ -163,6 +179,10 @@ pub fn case_folded_write(writer: *std.Io.Writer, text: []const u8) TransformErro return utf8_write_transform(.case_folding_simple, writer, text); } +pub fn case_folded_write_partial(writer: *std.Io.Writer, text: []const u8) TransformError![]const u8 { + return utf8_partial_write_transform(.case_folding_simple, writer, text); +} + pub fn switch_case(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 { return if (utf8_predicate(.is_lowercase, text)) to_upper(allocator, text) @@ -176,3 +196,47 @@ pub fn is_lowercase(text: []const u8) bool { const std = @import("std"); const uucode = @import("vaxis").uucode; + +const utf16LeToUtf8 = std.unicode.utf16LeToUtf8; +const utf8ByteSequenceLength = std.unicode.utf8ByteSequenceLength; +const utf8Decode = std.unicode.utf8Decode; +const utf8Encode = std.unicode.utf8Encode; +const Utf8View = std.unicode.Utf8View; + +const Utf8PartialIterator = struct { + bytes: []const u8, + end: usize, + + fn nextCodepointSlice(it: *Utf8PartialIterator) ?[]const u8 { + if (it.end >= it.bytes.len) { + return null; + } + + const cp_len = utf8ByteSequenceLength(it.bytes[it.end]) catch unreachable; + if (it.end + cp_len > it.bytes.len) { + return null; + } + it.end += cp_len; + return it.bytes[it.end - cp_len .. it.end]; + } + + fn nextCodepoint(it: *Utf8PartialIterator) ?u21 { + const slice = it.nextCodepointSlice() orelse return null; + return utf8Decode(slice) catch unreachable; + } +}; + +const Utf8PartialView = struct { + bytes: []const u8, + + fn initUnchecked(s: []const u8) Utf8PartialView { + return Utf8PartialView{ .bytes = s }; + } + + fn iterator(s: Utf8PartialView) Utf8PartialIterator { + return Utf8PartialIterator{ + .bytes = s.bytes, + .end = 0, + }; + } +};