feat: port to_upper and to_lower to uucode
This commit is contained in:
parent
2ff0521040
commit
e35a0555f1
3 changed files with 90 additions and 64 deletions
|
|
@ -324,11 +324,6 @@ pub fn build_exe(
|
||||||
.root_source_file = b.path("src/tracy_noop.zig"),
|
.root_source_file = b.path("src/tracy_noop.zig"),
|
||||||
});
|
});
|
||||||
|
|
||||||
const zg_dep = b.dependency("zg", .{
|
|
||||||
.target = target,
|
|
||||||
.optimize = optimize,
|
|
||||||
});
|
|
||||||
|
|
||||||
const zeit_dep = b.dependency("zeit", .{
|
const zeit_dep = b.dependency("zeit", .{
|
||||||
.target = target,
|
.target = target,
|
||||||
.optimize = optimize,
|
.optimize = optimize,
|
||||||
|
|
@ -424,7 +419,7 @@ pub fn build_exe(
|
||||||
.imports = &.{
|
.imports = &.{
|
||||||
.{ .name = "cbor", .module = cbor_mod },
|
.{ .name = "cbor", .module = cbor_mod },
|
||||||
.{ .name = "thespian", .module = thespian_mod },
|
.{ .name = "thespian", .module = thespian_mod },
|
||||||
.{ .name = "LetterCasing", .module = zg_dep.module("LetterCasing") },
|
.{ .name = "vaxis", .module = vaxis_mod },
|
||||||
.{ .name = "file_type_config", .module = file_type_config_mod },
|
.{ .name = "file_type_config", .module = file_type_config_mod },
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -30,8 +30,8 @@
|
||||||
.hash = "fuzzig-0.1.1-Ji0xivxIAQBD0g8O_NV_0foqoPf3elsg9Sc3pNfdVH4D",
|
.hash = "fuzzig-0.1.1-Ji0xivxIAQBD0g8O_NV_0foqoPf3elsg9Sc3pNfdVH4D",
|
||||||
},
|
},
|
||||||
.vaxis = .{
|
.vaxis = .{
|
||||||
.url = "git+https://github.com/neurocyte/libvaxis?ref=main#78aff0865f0f6206ece9263dcd9b051352da1e82",
|
.url = "git+https://github.com/neurocyte/libvaxis?ref=main#164dfadbad9a5df529ade8e3f0e5c52939d27e81",
|
||||||
.hash = "vaxis-0.5.1-BWNV_BkyCQAo2UBeY_gbzP09d4pHDaqtg44snF5k3mh2",
|
.hash = "vaxis-0.5.1-BWNV_M4yCQBLJYI9ooJL-8dstYnId58coiBeHhF4m9wz",
|
||||||
},
|
},
|
||||||
.zeit = .{
|
.zeit = .{
|
||||||
.url = "git+https://github.com/rockorager/zeit?ref=zig-0.15#ed2ca60db118414bda2b12df2039e33bad3b0b88",
|
.url = "git+https://github.com/rockorager/zeit?ref=zig-0.15#ed2ca60db118414bda2b12df2039e33bad3b0b88",
|
||||||
|
|
@ -42,10 +42,6 @@
|
||||||
.hash = "zigwin32-25.0.28-preview-AAAAAICM5AMResOGQnQ85mfe60TTOQeMtt7GRATUOKoP",
|
.hash = "zigwin32-25.0.28-preview-AAAAAICM5AMResOGQnQ85mfe60TTOQeMtt7GRATUOKoP",
|
||||||
.lazy = true,
|
.lazy = true,
|
||||||
},
|
},
|
||||||
.zg = .{
|
|
||||||
.url = "git+https://codeberg.org/neurocyte/zg?ref=master#cdcab8b9ea3458efd710008055d993c5dbdb1af7",
|
|
||||||
.hash = "zg-0.15.2-oGqU3AtAtAI7gs7zPvzg2_TlVIqi9wCNEw7DLvD5OvDN",
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
.paths = .{
|
.paths = .{
|
||||||
"include",
|
"include",
|
||||||
|
|
|
||||||
|
|
@ -64,58 +64,6 @@ pub const open_close_pairs = [_]struct { []const u8, []const u8 }{
|
||||||
.{ "¡", "!" },
|
.{ "¡", "!" },
|
||||||
};
|
};
|
||||||
|
|
||||||
fn raw_byte_to_utf8(cp: u8, buf: []u8) ![]const u8 {
|
|
||||||
var utf16le: [1]u16 = undefined;
|
|
||||||
const utf16le_as_bytes = std.mem.sliceAsBytes(utf16le[0..]);
|
|
||||||
std.mem.writeInt(u16, utf16le_as_bytes[0..2], cp, .little);
|
|
||||||
return buf[0..try std.unicode.utf16LeToUtf8(buf, &utf16le)];
|
|
||||||
}
|
|
||||||
|
|
||||||
const std = @import("std");
|
|
||||||
|
|
||||||
pub fn utf8_sanitize(allocator: std.mem.Allocator, input: []const u8) error{
|
|
||||||
OutOfMemory,
|
|
||||||
DanglingSurrogateHalf,
|
|
||||||
ExpectedSecondSurrogateHalf,
|
|
||||||
UnexpectedSecondSurrogateHalf,
|
|
||||||
}![]u8 {
|
|
||||||
var output: std.ArrayListUnmanaged(u8) = .{};
|
|
||||||
const writer = output.writer(allocator);
|
|
||||||
var buf: [4]u8 = undefined;
|
|
||||||
for (input) |byte| try writer.writeAll(try raw_byte_to_utf8(byte, &buf));
|
|
||||||
return output.toOwnedSlice(allocator);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub const LetterCasing = @import("LetterCasing");
|
|
||||||
var letter_casing: ?LetterCasing = null;
|
|
||||||
var letter_casing_arena = std.heap.ArenaAllocator.init(std.heap.c_allocator);
|
|
||||||
|
|
||||||
fn get_letter_casing() *LetterCasing {
|
|
||||||
if (letter_casing) |*cd| return cd;
|
|
||||||
letter_casing = LetterCasing.init(letter_casing_arena.allocator()) catch @panic("LetterCasing.init");
|
|
||||||
return &letter_casing.?;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn to_upper(allocator: std.mem.Allocator, text: []const u8) error{ OutOfMemory, Utf8CannotEncodeSurrogateHalf, CodepointTooLarge }![]u8 {
|
|
||||||
return get_letter_casing().toUpperStr(allocator, text);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn to_lower(allocator: std.mem.Allocator, text: []const u8) error{ OutOfMemory, Utf8CannotEncodeSurrogateHalf, CodepointTooLarge }![]u8 {
|
|
||||||
return get_letter_casing().toLowerStr(allocator, text);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn case_fold(allocator: std.mem.Allocator, text: []const u8) error{ OutOfMemory, Utf8CannotEncodeSurrogateHalf, CodepointTooLarge }![]u8 {
|
|
||||||
return get_letter_casing().toLowerStr(allocator, text);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn switch_case(allocator: std.mem.Allocator, text: []const u8) error{ OutOfMemory, Utf8CannotEncodeSurrogateHalf, CodepointTooLarge }![]u8 {
|
|
||||||
const letter_casing_ = get_letter_casing();
|
|
||||||
return if (letter_casing_.isLowerStr(text))
|
|
||||||
letter_casing_.toUpperStr(allocator, text)
|
|
||||||
else
|
|
||||||
letter_casing_.toLowerStr(allocator, text);
|
|
||||||
}
|
|
||||||
|
|
||||||
const spinner = [_][]const u8{
|
const spinner = [_][]const u8{
|
||||||
"⠋",
|
"⠋",
|
||||||
"⠙",
|
"⠙",
|
||||||
|
|
@ -136,3 +84,90 @@ const spinner_short = [_][]const u8{
|
||||||
"⠦",
|
"⠦",
|
||||||
"⠇",
|
"⠇",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
fn raw_byte_to_utf8(cp: u8, buf: []u8) ![]const u8 {
|
||||||
|
var utf16le: [1]u16 = undefined;
|
||||||
|
const utf16le_as_bytes = std.mem.sliceAsBytes(utf16le[0..]);
|
||||||
|
std.mem.writeInt(u16, utf16le_as_bytes[0..2], cp, .little);
|
||||||
|
return buf[0..try std.unicode.utf16LeToUtf8(buf, &utf16le)];
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn utf8_sanitize(allocator: std.mem.Allocator, input: []const u8) error{
|
||||||
|
OutOfMemory,
|
||||||
|
DanglingSurrogateHalf,
|
||||||
|
ExpectedSecondSurrogateHalf,
|
||||||
|
UnexpectedSecondSurrogateHalf,
|
||||||
|
}![]u8 {
|
||||||
|
var output: std.ArrayListUnmanaged(u8) = .{};
|
||||||
|
const writer = output.writer(allocator);
|
||||||
|
var buf: [4]u8 = undefined;
|
||||||
|
for (input) |byte| try writer.writeAll(try raw_byte_to_utf8(byte, &buf));
|
||||||
|
return output.toOwnedSlice(allocator);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const TransformError = error{
|
||||||
|
InvalidUtf8,
|
||||||
|
OutOfMemory,
|
||||||
|
Utf8CannotEncodeSurrogateHalf,
|
||||||
|
CodepointTooLarge,
|
||||||
|
WriteFailed,
|
||||||
|
};
|
||||||
|
|
||||||
|
fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
|
||||||
|
var result: std.Io.Writer.Allocating = .init(allocator);
|
||||||
|
defer result.deinit();
|
||||||
|
const view: std.unicode.Utf8View = try .init(text);
|
||||||
|
var it = view.iterator();
|
||||||
|
while (it.nextCodepoint()) |cp| {
|
||||||
|
const cp_ = switch (field) {
|
||||||
|
.simple_uppercase_mapping, .simple_lowercase_mapping => uucode.get(field, cp) orelse cp,
|
||||||
|
.case_folding_simple => uucode.get(field, cp),
|
||||||
|
else => @compileError(@tagName(field) ++ " is not a unicode transformation"),
|
||||||
|
};
|
||||||
|
var utf8_buf: [6]u8 = undefined;
|
||||||
|
const size = try std.unicode.utf8Encode(cp_, &utf8_buf);
|
||||||
|
try result.writer.writeAll(utf8_buf[0..size]);
|
||||||
|
}
|
||||||
|
return result.toOwnedSlice();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) TransformError!bool {
|
||||||
|
const view: std.unicode.Utf8View = try .init(text);
|
||||||
|
var it = view.iterator();
|
||||||
|
while (it.nextCodepoint()) |cp| {
|
||||||
|
const result = switch (field) {
|
||||||
|
.is_lowercase => uucode.get(field, cp),
|
||||||
|
else => @compileError(@tagName(field) ++ " is not a unicode predicate"),
|
||||||
|
};
|
||||||
|
if (!result) return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_upper(allocator: std.mem.Allocator, text: []const u8) error{
|
||||||
|
InvalidUtf8,
|
||||||
|
OutOfMemory,
|
||||||
|
Utf8CannotEncodeSurrogateHalf,
|
||||||
|
CodepointTooLarge,
|
||||||
|
WriteFailed,
|
||||||
|
}![]u8 {
|
||||||
|
return utf8_transform(.simple_uppercase_mapping, allocator, text);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_lower(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
|
||||||
|
return utf8_transform(.simple_lowercase_mapping, allocator, text);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn case_fold(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
|
||||||
|
return utf8_transform(.case_folding_simple, allocator, text);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn switch_case(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
|
||||||
|
return if (try utf8_predicate(.is_lowercase, text))
|
||||||
|
to_upper(allocator, text)
|
||||||
|
else
|
||||||
|
to_lower(allocator, text);
|
||||||
|
}
|
||||||
|
|
||||||
|
const std = @import("std");
|
||||||
|
const uucode = @import("vaxis").uucode;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue