switch_case already uses changes_when_lowercased to detect uppercase characters. toggle_case was using is_lowercase with inverted branch order, which is functionally equivalent but inconsistent.
271 lines
8.2 KiB
Zig
271 lines
8.2 KiB
Zig
pub fn control_code_to_unicode(code: u8) [:0]const u8 {
|
||
return switch (code) {
|
||
'\x00' => "␀",
|
||
'\x01' => "␁",
|
||
'\x02' => "␂",
|
||
'\x03' => "␃",
|
||
'\x04' => "␄",
|
||
'\x05' => "␅",
|
||
'\x06' => "␆",
|
||
'\x07' => "␇",
|
||
'\x08' => "␈",
|
||
'\x09' => "␉",
|
||
'\x0A' => "␊",
|
||
'\x0B' => "␋",
|
||
'\x0C' => "␌",
|
||
'\x0D' => "␍",
|
||
'\x0E' => "␎",
|
||
'\x0F' => "␏",
|
||
'\x10' => "␐",
|
||
'\x11' => "␑",
|
||
'\x12' => "␒",
|
||
'\x13' => "␓",
|
||
'\x14' => "␔",
|
||
'\x15' => "␕",
|
||
'\x16' => "␖",
|
||
'\x17' => "␗",
|
||
'\x18' => "␘",
|
||
'\x19' => "␙",
|
||
'\x1A' => "␚",
|
||
'\x1B' => "␛",
|
||
'\x1C' => "␜",
|
||
'\x1D' => "␝",
|
||
'\x1E' => "␞",
|
||
'\x1F' => "␟",
|
||
'\x20' => "␠",
|
||
'\x7F' => "␡",
|
||
else => "",
|
||
};
|
||
}
|
||
|
||
pub const char_pairs = [_]struct { []const u8, []const u8 }{
|
||
.{ "\"", "\"" },
|
||
.{ "'", "'" },
|
||
.{ "`", "`" },
|
||
.{ "(", ")" },
|
||
.{ "[", "]" },
|
||
.{ "{", "}" },
|
||
.{ "‘", "’" },
|
||
.{ "“", "”" },
|
||
.{ "‚", "‘" },
|
||
.{ "«", "»" },
|
||
.{ "¿", "?" },
|
||
.{ "¡", "!" },
|
||
};
|
||
|
||
pub const open_close_pairs = [_]struct { []const u8, []const u8 }{
|
||
.{ "(", ")" },
|
||
.{ "[", "]" },
|
||
.{ "{", "}" },
|
||
.{ "‘", "’" },
|
||
.{ "“", "”" },
|
||
.{ "«", "»" },
|
||
.{ "¿", "?" },
|
||
.{ "¡", "!" },
|
||
};
|
||
|
||
const spinner = [_][]const u8{
|
||
"⠋",
|
||
"⠙",
|
||
"⠹",
|
||
"⠸",
|
||
"⠼",
|
||
"⠴",
|
||
"⠦",
|
||
"⠧",
|
||
"⠇",
|
||
};
|
||
|
||
const spinner_short = [_][]const u8{
|
||
"⠋",
|
||
"⠙",
|
||
"⠸",
|
||
"⠴",
|
||
"⠦",
|
||
"⠇",
|
||
};
|
||
|
||
fn raw_byte_to_utf8(cp: u8, buf: []u8) ![]const u8 {
|
||
var utf16le: [1]u16 = undefined;
|
||
const utf16le_as_bytes = std.mem.sliceAsBytes(utf16le[0..]);
|
||
std.mem.writeInt(u16, utf16le_as_bytes[0..2], cp, .little);
|
||
return buf[0..try utf16LeToUtf8(buf, &utf16le)];
|
||
}
|
||
|
||
pub fn utf8_sanitize(allocator: std.mem.Allocator, input: []const u8) error{
|
||
OutOfMemory,
|
||
DanglingSurrogateHalf,
|
||
ExpectedSecondSurrogateHalf,
|
||
UnexpectedSecondSurrogateHalf,
|
||
}![]u8 {
|
||
var output: std.ArrayListUnmanaged(u8) = .{};
|
||
const writer = output.writer(allocator);
|
||
var buf: [4]u8 = undefined;
|
||
for (input) |byte| try writer.writeAll(try raw_byte_to_utf8(byte, &buf));
|
||
return output.toOwnedSlice(allocator);
|
||
}
|
||
|
||
pub const TransformError = error{
|
||
OutOfMemory,
|
||
Utf8CannotEncodeSurrogateHalf,
|
||
CodepointTooLarge,
|
||
WriteFailed,
|
||
};
|
||
|
||
fn utf8_write_transform_T(comptime View: anytype, comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError!@typeInfo(@TypeOf(View.iterator)).@"fn".return_type.? {
|
||
const view: View = .initUnchecked(text);
|
||
var it = view.iterator();
|
||
while (it.nextCodepoint()) |cp| {
|
||
const cp_ = switch (field) {
|
||
.simple_uppercase_mapping, .simple_lowercase_mapping => uucode.get(field, cp) orelse cp,
|
||
.case_folding_simple => uucode.get(field, cp),
|
||
else => @compileError(@tagName(field) ++ " is not a unicode transformation"),
|
||
};
|
||
var utf8_buf: [6]u8 = undefined;
|
||
const size = try utf8Encode(cp_, &utf8_buf);
|
||
try writer.writeAll(utf8_buf[0..size]);
|
||
}
|
||
return it;
|
||
}
|
||
|
||
fn utf8_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError!void {
|
||
_ = try utf8_write_transform_T(Utf8View, field, writer, text);
|
||
}
|
||
|
||
fn utf8_partial_write_transform(comptime field: uucode.FieldEnum, writer: *std.Io.Writer, text: []const u8) TransformError![]const u8 {
|
||
const it = try utf8_write_transform_T(Utf8PartialView, field, writer, text);
|
||
return text[0..it.end];
|
||
}
|
||
|
||
fn utf8_transform(comptime field: uucode.FieldEnum, allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
|
||
var result: std.Io.Writer.Allocating = .init(allocator);
|
||
defer result.deinit();
|
||
try utf8_write_transform(field, &result.writer, text);
|
||
return result.toOwnedSlice();
|
||
}
|
||
|
||
fn utf8_predicate_all(comptime field: uucode.FieldEnum, text: []const u8) bool {
|
||
const view: Utf8View = .initUnchecked(text);
|
||
var it = view.iterator();
|
||
while (it.nextCodepoint()) |cp| {
|
||
const result = switch (field) {
|
||
.is_lowercase => uucode.get(field, cp),
|
||
.changes_when_casefolded => uucode.get(field, cp),
|
||
.changes_when_lowercased => uucode.get(field, cp),
|
||
else => @compileError(@tagName(field) ++ " is not a unicode predicate"),
|
||
};
|
||
if (!result) return false;
|
||
}
|
||
return true;
|
||
}
|
||
|
||
fn utf8_predicate_any(comptime field: uucode.FieldEnum, text: []const u8) bool {
|
||
const view: Utf8View = .initUnchecked(text);
|
||
var it = view.iterator();
|
||
while (it.nextCodepoint()) |cp| {
|
||
const result = switch (field) {
|
||
.is_lowercase => uucode.get(field, cp),
|
||
.changes_when_casefolded => uucode.get(field, cp),
|
||
.changes_when_lowercased => uucode.get(field, cp),
|
||
else => @compileError(@tagName(field) ++ " is not a unicode predicate"),
|
||
};
|
||
if (result) return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
pub fn to_upper(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
|
||
return utf8_transform(.simple_uppercase_mapping, allocator, text);
|
||
}
|
||
|
||
pub fn to_lower(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
|
||
return utf8_transform(.simple_lowercase_mapping, allocator, text);
|
||
}
|
||
|
||
pub fn case_fold(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
|
||
return utf8_transform(.case_folding_simple, allocator, text);
|
||
}
|
||
|
||
pub fn case_folded_write(writer: *std.Io.Writer, text: []const u8) TransformError!void {
|
||
return utf8_write_transform(.case_folding_simple, writer, text);
|
||
}
|
||
|
||
pub fn case_folded_write_partial(writer: *std.Io.Writer, text: []const u8) TransformError![]const u8 {
|
||
return utf8_partial_write_transform(.case_folding_simple, writer, text);
|
||
}
|
||
|
||
pub fn switch_case(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
|
||
return if (utf8_predicate_any(.changes_when_lowercased, text))
|
||
to_lower(allocator, text)
|
||
else
|
||
to_upper(allocator, text);
|
||
}
|
||
|
||
pub fn toggle_case(allocator: std.mem.Allocator, text: []const u8) TransformError![]u8 {
|
||
var result: std.Io.Writer.Allocating = .init(allocator);
|
||
defer result.deinit();
|
||
const writer = &result.writer;
|
||
const view: Utf8View = .initUnchecked(text);
|
||
var it = view.iterator();
|
||
while (it.nextCodepoint()) |cp| {
|
||
const cp_ = if (uucode.get(.changes_when_lowercased, cp))
|
||
uucode.get(.simple_lowercase_mapping, cp) orelse cp
|
||
else
|
||
uucode.get(.simple_uppercase_mapping, cp) orelse cp;
|
||
var utf8_buf: [6]u8 = undefined;
|
||
const size = try utf8Encode(cp_, &utf8_buf);
|
||
try writer.writeAll(utf8_buf[0..size]);
|
||
}
|
||
return result.toOwnedSlice();
|
||
}
|
||
|
||
pub fn is_lowercase(text: []const u8) bool {
|
||
return utf8_predicate_all(.is_lowercase, text);
|
||
}
|
||
|
||
const std = @import("std");
|
||
const uucode = @import("vaxis").uucode;
|
||
|
||
const utf16LeToUtf8 = std.unicode.utf16LeToUtf8;
|
||
const utf8ByteSequenceLength = std.unicode.utf8ByteSequenceLength;
|
||
const utf8Decode = std.unicode.utf8Decode;
|
||
const utf8Encode = std.unicode.utf8Encode;
|
||
const Utf8View = std.unicode.Utf8View;
|
||
|
||
const Utf8PartialIterator = struct {
|
||
bytes: []const u8,
|
||
end: usize,
|
||
|
||
fn nextCodepointSlice(it: *Utf8PartialIterator) ?[]const u8 {
|
||
if (it.end >= it.bytes.len) {
|
||
return null;
|
||
}
|
||
|
||
const cp_len = utf8ByteSequenceLength(it.bytes[it.end]) catch return null;
|
||
if (it.end + cp_len > it.bytes.len) {
|
||
return null;
|
||
}
|
||
it.end += cp_len;
|
||
return it.bytes[it.end - cp_len .. it.end];
|
||
}
|
||
|
||
fn nextCodepoint(it: *Utf8PartialIterator) ?u21 {
|
||
const slice = it.nextCodepointSlice() orelse return null;
|
||
return utf8Decode(slice) catch unreachable;
|
||
}
|
||
};
|
||
|
||
const Utf8PartialView = struct {
|
||
bytes: []const u8,
|
||
|
||
fn initUnchecked(s: []const u8) Utf8PartialView {
|
||
return Utf8PartialView{ .bytes = s };
|
||
}
|
||
|
||
fn iterator(s: Utf8PartialView) Utf8PartialIterator {
|
||
return Utf8PartialIterator{
|
||
.bytes = s.bytes,
|
||
.end = 0,
|
||
};
|
||
}
|
||
};
|