flow/src/buffer/unicode.zig

185 lines
5.1 KiB
Zig
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

pub fn control_code_to_unicode(code: u8) [:0]const u8 {
return switch (code) {
'\x00' => "",
'\x01' => "",
'\x02' => "",
'\x03' => "",
'\x04' => "",
'\x05' => "",
'\x06' => "",
'\x07' => "",
'\x08' => "",
'\x09' => "",
'\x0A' => "",
'\x0B' => "",
'\x0C' => "",
'\x0D' => "",
'\x0E' => "",
'\x0F' => "",
'\x10' => "",
'\x11' => "",
'\x12' => "",
'\x13' => "",
'\x14' => "",
'\x15' => "",
'\x16' => "",
'\x17' => "",
'\x18' => "",
'\x19' => "",
'\x1A' => "",
'\x1B' => "",
'\x1C' => "",
'\x1D' => "",
'\x1E' => "",
'\x1F' => "",
'\x20' => "",
'\x7F' => "",
else => "",
};
}
pub const char_pairs = [_]struct { []const u8, []const u8 }{
.{ "\"", "\"" },
.{ "'", "'" },
.{ "`", "`" },
.{ "(", ")" },
.{ "[", "]" },
.{ "{", "}" },
.{ "", "" },
.{ "", "" },
.{ "", "" },
.{ "«", "»" },
.{ "¿", "?" },
.{ "¡", "!" },
};
pub const open_close_pairs = [_]struct { []const u8, []const u8 }{
.{ "(", ")" },
.{ "[", "]" },
.{ "{", "}" },
.{ "", "" },
.{ "", "" },
.{ "«", "»" },
.{ "¿", "?" },
.{ "¡", "!" },
};
const spinner = [_][]const u8{
"",
"",
"",
"",
"",
"",
"",
"",
"",
};
const spinner_short = [_][]const u8{
"",
"",
"",
"",
"",
"",
};
fn raw_byte_to_utf8(cp: u8, buf: []u8) ![]const u8 {
var utf16le: [1]u16 = undefined;
const utf16le_as_bytes = std.mem.sliceAsBytes(utf16le[0..]);
std.mem.writeInt(u16, utf16le_as_bytes[0..2], cp, .little);
return buf[0..try std.unicode.utf16LeToUtf8(buf, &utf16le)];
}
pub fn utf8_sanitize(allocator: std.mem.Allocator, input: []const u8) error{
OutOfMemory,
DanglingSurrogateHalf,
ExpectedSecondSurrogateHalf,
UnexpectedSecondSurrogateHalf,
}![]u8 {
var output: std.ArrayListUnmanaged(u8) = .{};
const writer = output.writer(allocator);
var buf: [4]u8 = undefined;
for (input) |byte| try writer.writeAll(try raw_byte_to_utf8(byte, &buf));
return output.toOwnedSlice(allocator);
}
pub const TransformError = error{
InvalidUtf8,
OutOfMemory,
Utf8CannotEncodeSurrogateHalf,
CodepointTooLarge,
WriteFailed,
};
fn utf8_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError!void {
const view: std.unicode.Utf8View = try .init(text);
var it = view.iterator();
while (it.nextCodepoint()) |cp| {
const cp_ = switch (field) {
.simple_uppercase_mapping, .simple_lowercase_mapping => uucode.get(field, cp) orelse cp,
.case_folding_simple => uucode.get(field, cp),
else => @compileError(@tagName(field) ++ " is not a unicode transformation"),
};
var utf8_buf: [6]u8 = undefined;
const size = try std.unicode.utf8Encode(cp_, &utf8_buf);
try writer.writeAll(utf8_buf[0..size]);
}
}
fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
var result: std.Io.Writer.Allocating = .init(allocator);
defer result.deinit();
try utf8_write_transform(field, &result.writer, text);
return result.toOwnedSlice();
}
fn utf8_predicate(comptime field: uucode.FieldEnum, text: []const u8) error{InvalidUtf8}!bool {
const view: std.unicode.Utf8View = try .init(text);
var it = view.iterator();
while (it.nextCodepoint()) |cp| {
const result = switch (field) {
.is_lowercase => uucode.get(field, cp),
else => @compileError(@tagName(field) ++ " is not a unicode predicate"),
};
if (!result) return false;
}
return true;
}
pub fn to_upper(allocator: std.mem.Allocator, text: []const u8) error{
InvalidUtf8,
OutOfMemory,
Utf8CannotEncodeSurrogateHalf,
CodepointTooLarge,
WriteFailed,
}![]u8 {
return utf8_transform(.simple_uppercase_mapping, allocator, text);
}
pub fn to_lower(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
return utf8_transform(.simple_lowercase_mapping, allocator, text);
}
pub fn case_fold(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
return utf8_transform(.case_folding_simple, allocator, text);
}
pub fn case_folded_write(writer: *std.Io.Writer, text: []const u8) TransformError!void {
return utf8_write_transform(.case_folding_simple, writer, text);
}
pub fn switch_case(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
return if (try utf8_predicate(.is_lowercase, text))
to_upper(allocator, text)
else
to_lower(allocator, text);
}
pub fn is_lowercase(text: []const u8) error{InvalidUtf8}!bool {
return try utf8_predicate(.is_lowercase, text);
}
const std = @import("std");
const uucode = @import("vaxis").uucode;