flow/src/buffer/unicode.zig

pub fn control_code_to_unicode(code: u8) [:0]const u8 {
    return switch (code) {
        '\x00' => "␀",
        '\x01' => "␁",
        '\x02' => "␂",
        '\x03' => "␃",
        '\x04' => "␄",
        '\x05' => "␅",
        '\x06' => "␆",
        '\x07' => "␇",
        '\x08' => "␈",
        '\x09' => "␉",
        '\x0A' => "␊",
        '\x0B' => "␋",
        '\x0C' => "␌",
        '\x0D' => "␍",
        '\x0E' => "␎",
        '\x0F' => "␏",
        '\x10' => "␐",
        '\x11' => "␑",
        '\x12' => "␒",
        '\x13' => "␓",
        '\x14' => "␔",
        '\x15' => "␕",
        '\x16' => "␖",
        '\x17' => "␗",
        '\x18' => "␘",
        '\x19' => "␙",
        '\x1A' => "␚",
        '\x1B' => "␛",
        '\x1C' => "␜",
        '\x1D' => "␝",
        '\x1E' => "␞",
        '\x1F' => "␟",
        '\x20' => "␠",
        '\x7F' => "␡",
        else => "",
    };
}

pub const char_pairs = [_]struct { []const u8, []const u8 }{
    .{ "\"", "\"" },
    .{ "'", "'" },
    .{ "`", "`" },
    .{ "(", ")" },
    .{ "[", "]" },
    .{ "{", "}" },
    .{ "‘", "’" },
    .{ "“", "”" },
    .{ "‚", "‘" },
    .{ "«", "»" },
    .{ "¿", "?" },
    .{ "¡", "!" },
};

pub const open_close_pairs = [_]struct { []const u8, []const u8 }{
    .{ "(", ")" },
    .{ "[", "]" },
    .{ "{", "}" },
    .{ "‘", "’" },
    .{ "“", "”" },
    .{ "«", "»" },
    .{ "¿", "?" },
    .{ "¡", "!" },
};

const spinner = [_][]const u8{
    "⠋",
    "⠙",
    "⠹",
    "⠸",
    "⠼",
    "⠴",
    "⠦",
    "⠧",
    "⠇",
};

const spinner_short = [_][]const u8{
    "⠋",
    "⠙",
    "⠸",
    "⠴",
    "⠦",
    "⠇",
};

fn raw_byte_to_utf8(cp: u8, buf: []u8) ![]const u8 {
    var utf16le: [1]u16 = undefined;
    const utf16le_as_bytes = std.mem.sliceAsBytes(utf16le[0..]);
    std.mem.writeInt(u16, utf16le_as_bytes[0..2], cp, .little);
    return buf[0..try utf16LeToUtf8(buf, &utf16le)];
}

pub fn utf8_sanitize(allocator: std.mem.Allocator, input: []const u8) error{
    OutOfMemory,
    DanglingSurrogateHalf,
    ExpectedSecondSurrogateHalf,
    UnexpectedSecondSurrogateHalf,
}![]u8 {
    var output: std.ArrayListUnmanaged(u8) = .{};
    const writer = output.writer(allocator);
    var buf: [4]u8 = undefined;
    for (input) |byte| try writer.writeAll(try raw_byte_to_utf8(byte, &buf));
    return output.toOwnedSlice(allocator);
}

pub const TransformError = error{
    OutOfMemory,
    Utf8CannotEncodeSurrogateHalf,
    CodepointTooLarge,
    WriteFailed,
};

fn utf8_write_transform_T(comptime View: anytype, comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError!@typeInfo(@TypeOf(View.iterator)).@"fn".return_type.? {
    const view: View = .initUnchecked(text);
    var it = view.iterator();
    while (it.nextCodepoint()) |cp| {
        const cp_ = switch (field) {
            .simple_uppercase_mapping, .simple_lowercase_mapping => uucode.get(field, cp) orelse cp,
            .case_folding_simple => uucode.get(field, cp),
            else => @compileError(@tagName(field) ++ " is not a unicode transformation"),
        };
        var utf8_buf: [6]u8 = undefined;
        const size = try utf8Encode(cp_, &utf8_buf);
        try writer.writeAll(utf8_buf[0..size]);
    }
    return it;
}

fn utf8_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError!void {
    _ = try utf8_write_transform_T(Utf8View, field, writer, text);
}

fn utf8_partial_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError![]const u8 {
    const it = try utf8_write_transform_T(Utf8PartialView, field, writer, text);
    return text[0..it.end];
}

fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
    var result: std.Io.Writer.Allocating = .init(allocator);
    defer result.deinit();
    try utf8_write_transform(field, &result.writer, text);
    return result.toOwnedSlice();
}

fn utf8_predicate_all(comptime field: uucode.FieldEnum, text: []const u8) bool {
    const view: Utf8View = .initUnchecked(text);
    var it = view.iterator();
    while (it.nextCodepoint()) |cp| {
        const result = switch (field) {
            .is_lowercase => uucode.get(field, cp),
            .changes_when_casefolded => uucode.get(field, cp),
            .changes_when_lowercased => uucode.get(field, cp),
            else => @compileError(@tagName(field) ++ " is not a unicode predicate"),
        };
        if (!result) return false;
    }
    return true;
}

fn utf8_predicate_any(comptime field: uucode.FieldEnum, text: []const u8) bool {
    const view: Utf8View = .initUnchecked(text);
    var it = view.iterator();
    while (it.nextCodepoint()) |cp| {
        const result = switch (field) {
            .is_lowercase => uucode.get(field, cp),
            .changes_when_casefolded => uucode.get(field, cp),
            .changes_when_lowercased => uucode.get(field, cp),
            else => @compileError(@tagName(field) ++ " is not a unicode predicate"),
        };
        if (result) return true;
    }
    return false;
}

pub fn to_upper(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
    return utf8_transform(.simple_uppercase_mapping, allocator, text);
}

pub fn to_lower(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
    return utf8_transform(.simple_lowercase_mapping, allocator, text);
}

pub fn case_fold(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
    return utf8_transform(.case_folding_simple, allocator, text);
}

pub fn case_folded_write(writer: *std.Io.Writer, text: []const u8) TransformError!void {
    return utf8_write_transform(.case_folding_simple, writer, text);
}

pub fn case_folded_write_partial(writer: *std.Io.Writer, text: []const u8) TransformError![]const u8 {
    return utf8_partial_write_transform(.case_folding_simple, writer, text);
}

pub fn switch_case(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
    return if (utf8_predicate_any(.changes_when_lowercased, text))
        to_lower(allocator, text)
    else
        to_upper(allocator, text);
}

pub fn toggle_case(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
    var result: std.Io.Writer.Allocating = .init(allocator);
    defer result.deinit();
    const writer = &result.writer;
    const view: Utf8View = .initUnchecked(text);
    var it = view.iterator();
    while (it.nextCodepoint()) |cp| {
        const cp_ = if (uucode.get(.changes_when_lowercased, cp))
            uucode.get(.simple_lowercase_mapping, cp) orelse cp
        else
            uucode.get(.simple_uppercase_mapping, cp) orelse cp;
        var utf8_buf: [6]u8 = undefined;
        const size = try utf8Encode(cp_, &utf8_buf);
        try writer.writeAll(utf8_buf[0..size]);
    }
    return result.toOwnedSlice();
}

pub fn is_lowercase(text: []const u8) bool {
    return utf8_predicate_all(.is_lowercase, text);
}

const std = @import("std");
const uucode = @import("vaxis").uucode;

const utf16LeToUtf8 = std.unicode.utf16LeToUtf8;
const utf8ByteSequenceLength = std.unicode.utf8ByteSequenceLength;
const utf8Decode = std.unicode.utf8Decode;
const utf8Encode = std.unicode.utf8Encode;
const Utf8View = std.unicode.Utf8View;

const Utf8PartialIterator = struct {
    bytes: []const u8,
    end: usize,

    fn nextCodepointSlice(it: *Utf8PartialIterator) ?[]const u8 {
        if (it.end >= it.bytes.len) {
            return null;
        }

        const cp_len = utf8ByteSequenceLength(it.bytes[it.end]) catch return null;
        if (it.end + cp_len > it.bytes.len) {
            return null;
        }
        it.end += cp_len;
        return it.bytes[it.end - cp_len .. it.end];
    }

    fn nextCodepoint(it: *Utf8PartialIterator) ?u21 {
        const slice = it.nextCodepointSlice() orelse return null;
        return utf8Decode(slice) catch unreachable;
    }
};

const Utf8PartialView = struct {
    bytes: []const u8,

    fn initUnchecked(s: []const u8) Utf8PartialView {
        return Utf8PartialView{ .bytes = s };
    }

    fn iterator(s: Utf8PartialView) Utf8PartialIterator {
        return Utf8PartialIterator{
            .bytes = s.bytes,
            .end = 0,
        };
    }
};