From 7471848ef2ee242600749342e66867cb6ef83576 Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Sun, 15 Feb 2026 21:51:38 +0100 Subject: [PATCH 1/2] fix: switch_case does nothing if there are non-cased chars in range --- build.zig.zon | 4 ++-- src/buffer/unicode.zig | 27 ++++++++++++++++++++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index 77dbd65..60a5116 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -30,8 +30,8 @@ .hash = "fuzzig-0.1.1-Ji0xivxIAQBD0g8O_NV_0foqoPf3elsg9Sc3pNfdVH4D", }, .vaxis = .{ - .url = "git+https://github.com/neurocyte/libvaxis?ref=main#95034c7114601178467b42e69588cdd4c1d39eb1", - .hash = "vaxis-0.5.1-BWNV_PJJCQArtOy_n76jGWoqoBpnM5FKElA_i1IaYYcF", + .url = "git+https://github.com/neurocyte/libvaxis?ref=main#4fc5a651a85574c3dfdddebc1d9038fd6bb4e067", + .hash = "vaxis-0.5.1-BWNV_EBKCQDqoQZvUE22SBMKYTlZtIIEdLx2X8CfRDHf", }, .zeit = .{ .url = "git+https://github.com/rockorager/zeit?ref=zig-0.15#ed2ca60db118414bda2b12df2039e33bad3b0b88", diff --git a/src/buffer/unicode.zig b/src/buffer/unicode.zig index 4346f84..e329707 100644 --- a/src/buffer/unicode.zig +++ b/src/buffer/unicode.zig @@ -150,12 +150,14 @@ fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator return result.toOwnedSlice(); } -fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) bool { +fn utf8_predicate_all(comptime field: uucode.FieldEnum, text: []const u8) bool { const view: Utf8View = .initUnchecked(text); var it = view.iterator(); while (it.nextCodepoint()) |cp| { const result = switch (field) { .is_lowercase => uucode.get(field, cp), + .changes_when_casefolded => uucode.get(field, cp), + .changes_when_lowercased => uucode.get(field, cp), else => @compileError(@tagName(field) ++ " is not a unicode predicate"), }; if (!result) return false; @@ -163,6 +165,21 @@ fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) bool { return true; } +fn utf8_predicate_any(comptime field: uucode.FieldEnum, text: []const u8) bool { + const view: Utf8View = .initUnchecked(text); + var it = view.iterator(); + while (it.nextCodepoint()) |cp| { + const result = switch (field) { + .is_lowercase => uucode.get(field, cp), + .changes_when_casefolded => uucode.get(field, cp), + .changes_when_lowercased => uucode.get(field, cp), + else => @compileError(@tagName(field) ++ " is not a unicode predicate"), + }; + if (result) return true; + } + return false; +} + pub fn to_upper(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 { return utf8_transform(.simple_uppercase_mapping, allocator, text); } @@ -184,14 +201,14 @@ pub fn case_folded_write_partial(writer: *std.Io.Writer, text: []const u8) Trans } pub fn switch_case(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 { - return if (utf8_predicate(.is_lowercase, text)) - to_upper(allocator, text) + return if (utf8_predicate_any(.changes_when_lowercased, text)) + to_lower(allocator, text) else - to_lower(allocator, text); + to_upper(allocator, text); } pub fn is_lowercase(text: []const u8) bool { - return utf8_predicate(.is_lowercase, text); + return utf8_predicate_all(.is_lowercase, text); } const std = @import("std"); From a81f5c61d09b6faea3683410b5d73c4c431c785a Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Sun, 15 Feb 2026 21:52:58 +0100 Subject: [PATCH 2/2] refactor: de-duplicate utf8_write_transform & utf8_partial_write_transform --- src/buffer/unicode.zig | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/buffer/unicode.zig b/src/buffer/unicode.zig index e329707..eca8f5e 100644 --- a/src/buffer/unicode.zig +++ b/src/buffer/unicode.zig @@ -112,8 +112,8 @@ pub const TransformError = error{ WriteFailed, }; -fn utf8_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError!void { - const view: Utf8View = .initUnchecked(text); +fn utf8_write_transform_T(comptime View: anytype, comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError!@typeInfo(@TypeOf(View.iterator)).@"fn".return_type.? { + const view: View = .initUnchecked(text); var it = view.iterator(); while (it.nextCodepoint()) |cp| { const cp_ = switch (field) { @@ -125,21 +125,15 @@ fn utf8_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer const size = try utf8Encode(cp_, &utf8_buf); try writer.writeAll(utf8_buf[0..size]); } + return it; +} + +fn utf8_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError!void { + _ = try utf8_write_transform_T(Utf8View, field, writer, text); } fn utf8_partial_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError![]const u8 { - const view: Utf8PartialView = .initUnchecked(text); - var it = view.iterator(); - while (it.nextCodepoint()) |cp| { - const cp_ = switch (field) { - .simple_uppercase_mapping, .simple_lowercase_mapping => uucode.get(field, cp) orelse cp, - .case_folding_simple => uucode.get(field, cp), - else => @compileError(@tagName(field) ++ " is not a unicode transformation"), - }; - var utf8_buf: [6]u8 = undefined; - const size = try utf8Encode(cp_, &utf8_buf); - try writer.writeAll(utf8_buf[0..size]); - } + const it = try utf8_write_transform_T(Utf8PartialView, field, writer, text); return text[0..it.end]; }