feat: sanitize non utf-8 and display a status bar warning
This commit is contained in:
parent
e865a89ede
commit
c0a9be21f5
5 changed files with 79 additions and 20 deletions
|
@ -24,7 +24,6 @@ pub const Metrics = struct {
|
|||
pub const egc_length_func = *const fn (self: Metrics, egcs: []const u8, colcount: *c_int, abs_col: usize) usize;
|
||||
pub const egc_chunk_width_func = *const fn (self: Metrics, chunk_: []const u8, abs_col_: usize) usize;
|
||||
pub const egc_last_func = *const fn (self: Metrics, egcs: []const u8) []const u8;
|
||||
|
||||
};
|
||||
|
||||
arena: std.heap.ArenaAllocator,
|
||||
|
@ -38,6 +37,7 @@ last_save: ?Root = null,
|
|||
file_exists: bool = true,
|
||||
file_eol_mode: EolMode = .lf,
|
||||
last_save_eol_mode: EolMode = .lf,
|
||||
file_utf8_sanitized: bool = false,
|
||||
|
||||
undo_history: ?*UndoNode = null,
|
||||
redo_history: ?*UndoNode = null,
|
||||
|
@ -1064,12 +1064,11 @@ fn new_file(self: *const Self, file_exists: *bool) !Root {
|
|||
return Leaf.new(self.allocator, "", true, false);
|
||||
}
|
||||
|
||||
pub fn load(self: *const Self, reader: anytype, size: usize, eol_mode: *EolMode) !Root {
|
||||
pub fn load(self: *const Self, reader: anytype, size: usize, eol_mode: *EolMode, utf8_sanitized: *bool) !Root {
|
||||
const lf = '\n';
|
||||
const cr = '\r';
|
||||
var buf = try self.external_allocator.alloc(u8, size);
|
||||
const self_ = @constCast(self);
|
||||
self_.file_buf = buf;
|
||||
const read_size = try reader.readAll(buf);
|
||||
if (read_size != size)
|
||||
return error.BufferUnderrun;
|
||||
|
@ -1077,6 +1076,14 @@ pub fn load(self: *const Self, reader: anytype, size: usize, eol_mode: *EolMode)
|
|||
if (final_read != 0)
|
||||
@panic("unexpected data in final read");
|
||||
|
||||
if (!std.unicode.utf8ValidateSlice(buf)) {
|
||||
const converted = try unicode.utf8_sanitize(self.external_allocator, buf);
|
||||
self.external_allocator.free(buf);
|
||||
buf = converted;
|
||||
utf8_sanitized.* = true;
|
||||
}
|
||||
self_.file_buf = buf;
|
||||
|
||||
eol_mode.* = .lf;
|
||||
var leaf_count: usize = 1;
|
||||
for (0..buf.len) |i| {
|
||||
|
@ -1107,20 +1114,20 @@ pub fn load(self: *const Self, reader: anytype, size: usize, eol_mode: *EolMode)
|
|||
return Node.merge_in_place(leaves, self.allocator);
|
||||
}
|
||||
|
||||
pub fn load_from_string(self: *const Self, s: []const u8, eol_mode: *EolMode) !Root {
|
||||
pub fn load_from_string(self: *const Self, s: []const u8, eol_mode: *EolMode, utf8_sanitized: *bool) !Root {
|
||||
var stream = std.io.fixedBufferStream(s);
|
||||
return self.load(stream.reader(), s.len, eol_mode);
|
||||
return self.load(stream.reader(), s.len, eol_mode, utf8_sanitized);
|
||||
}
|
||||
|
||||
pub fn load_from_string_and_update(self: *Self, file_path: []const u8, s: []const u8) !void {
|
||||
self.root = try self.load_from_string(s, &self.file_eol_mode);
|
||||
self.root = try self.load_from_string(s, &self.file_eol_mode, &self.file_utf8_sanitized);
|
||||
self.file_path = try self.allocator.dupe(u8, file_path);
|
||||
self.last_save = self.root;
|
||||
self.last_save_eol_mode = self.file_eol_mode;
|
||||
self.file_exists = false;
|
||||
}
|
||||
|
||||
pub fn load_from_file(self: *const Self, file_path: []const u8, file_exists: *bool, eol_mode: *EolMode) !Root {
|
||||
pub fn load_from_file(self: *const Self, file_path: []const u8, file_exists: *bool, eol_mode: *EolMode, utf8_sanitized: *bool) !Root {
|
||||
const file = cwd().openFile(file_path, .{ .mode = .read_only }) catch |e| switch (e) {
|
||||
error.FileNotFound => return self.new_file(file_exists),
|
||||
else => return e,
|
||||
|
@ -1129,17 +1136,19 @@ pub fn load_from_file(self: *const Self, file_path: []const u8, file_exists: *bo
|
|||
file_exists.* = true;
|
||||
defer file.close();
|
||||
const stat = try file.stat();
|
||||
return self.load(file.reader(), @intCast(stat.size), eol_mode);
|
||||
return self.load(file.reader(), @intCast(stat.size), eol_mode, utf8_sanitized);
|
||||
}
|
||||
|
||||
pub fn load_from_file_and_update(self: *Self, file_path: []const u8) !void {
|
||||
var file_exists: bool = false;
|
||||
var eol_mode: EolMode = .lf;
|
||||
self.root = try self.load_from_file(file_path, &file_exists, &eol_mode);
|
||||
var utf8_sanitized: bool = false;
|
||||
self.root = try self.load_from_file(file_path, &file_exists, &eol_mode, &utf8_sanitized);
|
||||
self.file_path = try self.allocator.dupe(u8, file_path);
|
||||
self.last_save = self.root;
|
||||
self.file_exists = file_exists;
|
||||
self.file_eol_mode = eol_mode;
|
||||
self.file_utf8_sanitized = utf8_sanitized;
|
||||
self.last_save_eol_mode = eol_mode;
|
||||
}
|
||||
|
||||
|
@ -1183,6 +1192,7 @@ pub fn store_to_file_and_clean(self: *Self, file_path: []const u8) !void {
|
|||
self.last_save = self.root;
|
||||
self.last_save_eol_mode = self.file_eol_mode;
|
||||
self.file_exists = true;
|
||||
self.file_utf8_sanitized = false;
|
||||
}
|
||||
|
||||
pub fn is_dirty(self: *const Self) bool {
|
||||
|
|
|
@ -37,3 +37,25 @@ pub fn control_code_to_unicode(code: u8) [:0]const u8 {
|
|||
else => "",
|
||||
};
|
||||
}
|
||||
|
||||
fn raw_byte_to_utf8(cp: u8, buf: []u8) ![]const u8 {
|
||||
var utf16le: [1]u16 = undefined;
|
||||
const utf16le_as_bytes = std.mem.sliceAsBytes(utf16le[0..]);
|
||||
std.mem.writeInt(u16, utf16le_as_bytes[0..2], cp, .little);
|
||||
return buf[0..try std.unicode.utf16LeToUtf8(buf, &utf16le)];
|
||||
}
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
pub fn utf8_sanitize(allocator: std.mem.Allocator, input: []const u8) error{
|
||||
OutOfMemory,
|
||||
DanglingSurrogateHalf,
|
||||
ExpectedSecondSurrogateHalf,
|
||||
UnexpectedSecondSurrogateHalf,
|
||||
}![]u8 {
|
||||
var output: std.ArrayListUnmanaged(u8) = .{};
|
||||
const writer = output.writer(allocator);
|
||||
var buf: [4]u8 = undefined;
|
||||
for (input) |byte| try writer.writeAll(try raw_byte_to_utf8(byte, &buf));
|
||||
return output.toOwnedSlice(allocator);
|
||||
}
|
||||
|
|
|
@ -224,6 +224,7 @@ pub const Editor = struct {
|
|||
bytes: usize = 0,
|
||||
chunks: usize = 0,
|
||||
eol_mode: Buffer.EolMode = .lf,
|
||||
utf8_sanitized: bool = false,
|
||||
} = null,
|
||||
matches: Match.List,
|
||||
match_token: usize = 0,
|
||||
|
@ -259,6 +260,7 @@ pub const Editor = struct {
|
|||
cursels: usize = 0,
|
||||
dirty: bool = false,
|
||||
eol_mode: Buffer.EolMode = .lf,
|
||||
utf8_sanitized: bool = false,
|
||||
} = .{},
|
||||
|
||||
syntax: ?*syntax = null,
|
||||
|
@ -414,6 +416,10 @@ pub const Editor = struct {
|
|||
return if (self.buffer) |p| p.file_eol_mode else error.Stop;
|
||||
}
|
||||
|
||||
fn buf_utf8_sanitized(self: *const Self) !bool {
|
||||
return if (self.buffer) |p| p.file_utf8_sanitized else error.Stop;
|
||||
}
|
||||
|
||||
fn buf_a(self: *const Self) !Allocator {
|
||||
return if (self.buffer) |p| p.allocator else error.Stop;
|
||||
}
|
||||
|
@ -517,6 +523,7 @@ pub const Editor = struct {
|
|||
} else return error.SaveNoFileName;
|
||||
try self.send_editor_save(self.file_path.?);
|
||||
self.last.dirty = false;
|
||||
self.update_event() catch {};
|
||||
}
|
||||
|
||||
fn save_as(self: *Self, file_path: []const u8) !void {
|
||||
|
@ -525,6 +532,7 @@ pub const Editor = struct {
|
|||
self.file_path = try self.allocator.dupe(u8, file_path);
|
||||
try self.send_editor_save(self.file_path.?);
|
||||
self.last.dirty = false;
|
||||
self.update_event() catch {};
|
||||
}
|
||||
|
||||
pub fn push_cursor(self: *Self) !void {
|
||||
|
@ -575,10 +583,10 @@ pub const Editor = struct {
|
|||
|
||||
fn update_buf(self: *Self, root: Buffer.Root) !void {
|
||||
const b = self.buffer orelse return error.Stop;
|
||||
return self.update_buf_and_eol_mode(root, b.file_eol_mode);
|
||||
return self.update_buf_and_eol_mode(root, b.file_eol_mode, b.file_utf8_sanitized);
|
||||
}
|
||||
|
||||
fn update_buf_and_eol_mode(self: *Self, root: Buffer.Root, eol_mode: Buffer.EolMode) !void {
|
||||
fn update_buf_and_eol_mode(self: *Self, root: Buffer.Root, eol_mode: Buffer.EolMode, utf8_sanitized: bool) !void {
|
||||
const b = self.buffer orelse return error.Stop;
|
||||
var sfa = std.heap.stackFallback(512, self.allocator);
|
||||
const allocator = sfa.get();
|
||||
|
@ -587,6 +595,7 @@ pub const Editor = struct {
|
|||
try b.store_undo(meta);
|
||||
b.update(root);
|
||||
b.file_eol_mode = eol_mode;
|
||||
b.file_utf8_sanitized = utf8_sanitized;
|
||||
try self.send_editor_modified();
|
||||
}
|
||||
|
||||
|
@ -1210,13 +1219,14 @@ pub const Editor = struct {
|
|||
|
||||
const root: ?Buffer.Root = self.buf_root() catch null;
|
||||
const eol_mode = self.buf_eol_mode() catch .lf;
|
||||
const utf8_sanitized = self.buf_utf8_sanitized() catch false;
|
||||
if (token_from(self.last.root) != token_from(root)) {
|
||||
try self.send_editor_update(self.last.root, root, eol_mode);
|
||||
self.lsp_version += 1;
|
||||
}
|
||||
|
||||
if (self.last.eol_mode != eol_mode)
|
||||
try self.send_editor_eol_mode(eol_mode);
|
||||
if (self.last.eol_mode != eol_mode or self.last.utf8_sanitized != utf8_sanitized)
|
||||
try self.send_editor_eol_mode(eol_mode, utf8_sanitized);
|
||||
|
||||
if (self.last.dirty != dirty)
|
||||
try self.send_editor_dirty(dirty);
|
||||
|
@ -1254,6 +1264,7 @@ pub const Editor = struct {
|
|||
self.last.dirty = dirty;
|
||||
self.last.root = root;
|
||||
self.last.eol_mode = eol_mode;
|
||||
self.last.utf8_sanitized = utf8_sanitized;
|
||||
}
|
||||
|
||||
fn send_editor_pos(self: *const Self, cursor: *const Cursor) !void {
|
||||
|
@ -1333,8 +1344,8 @@ pub const Editor = struct {
|
|||
project_manager.did_change(file_path, self.lsp_version, token_from(new_root), token_from(old_root), eol_mode) catch {};
|
||||
}
|
||||
|
||||
fn send_editor_eol_mode(self: *const Self, eol_mode: Buffer.EolMode) !void {
|
||||
_ = try self.handlers.msg(.{ "E", "eol_mode", @intFromEnum(eol_mode) });
|
||||
fn send_editor_eol_mode(self: *const Self, eol_mode: Buffer.EolMode, utf8_sanitized: bool) !void {
|
||||
_ = try self.handlers.msg(.{ "E", "eol_mode", @intFromEnum(eol_mode), utf8_sanitized });
|
||||
}
|
||||
|
||||
fn clamp_abs(self: *Self, abs: bool) void {
|
||||
|
@ -4134,7 +4145,7 @@ pub const Editor = struct {
|
|||
self.cancel_all_selections();
|
||||
self.cancel_all_matches();
|
||||
if (state.whole_file) |buf| {
|
||||
state.work_root = try b.load_from_string(buf.items, &state.eol_mode);
|
||||
state.work_root = try b.load_from_string(buf.items, &state.eol_mode, &state.utf8_sanitized);
|
||||
state.bytes = buf.items.len;
|
||||
state.chunks = 1;
|
||||
primary.cursor = state.old_primary.cursor;
|
||||
|
@ -4145,7 +4156,7 @@ pub const Editor = struct {
|
|||
if (state.old_primary_reversed) sel.reverse();
|
||||
primary.cursor = sel.end;
|
||||
}
|
||||
try self.update_buf_and_eol_mode(state.work_root, state.eol_mode);
|
||||
try self.update_buf_and_eol_mode(state.work_root, state.eol_mode, state.utf8_sanitized);
|
||||
primary.cursor.clamp_to_buffer(state.work_root, self.metrics);
|
||||
self.logger.print("filter: done (bytes:{d} chunks:{d})", .{ state.bytes, state.chunks });
|
||||
self.reset_syntax();
|
||||
|
|
|
@ -32,6 +32,7 @@ file_dirty: bool = false,
|
|||
detailed: bool = false,
|
||||
file: bool = false,
|
||||
eol_mode: Buffer.EolMode = .lf,
|
||||
utf8_sanitized: bool = false,
|
||||
|
||||
const project_icon = "";
|
||||
const Self = @This();
|
||||
|
@ -161,6 +162,11 @@ fn render_detailed(self: *Self, plane: *Plane, theme: *const Widget.Theme) void
|
|||
_ = plane.print(" of {d} lines", .{self.lines}) catch {};
|
||||
if (self.file_type.len > 0)
|
||||
_ = plane.print(" ({s}){s}", .{ self.file_type, eol_mode }) catch {};
|
||||
|
||||
if (self.utf8_sanitized) {
|
||||
plane.set_style(.{ .fg = theme.editor_error.fg.? });
|
||||
_ = plane.putstr(" [UTF-8 sanitized]") catch {};
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -196,7 +202,7 @@ pub fn receive(self: *Self, _: *Button.State(Self), _: tp.pid_ref, m: tp.message
|
|||
return false;
|
||||
if (try m.match(.{ "E", "dirty", tp.extract(&file_dirty) })) {
|
||||
self.file_dirty = file_dirty;
|
||||
} else if (try m.match(.{ "E", "eol_mode", tp.extract(&eol_mode) })) {
|
||||
} else if (try m.match(.{ "E", "eol_mode", tp.extract(&eol_mode), tp.extract(&self.utf8_sanitized) })) {
|
||||
self.eol_mode = @enumFromInt(eol_mode);
|
||||
} else if (try m.match(.{ "E", "save", tp.extract(&file_path) })) {
|
||||
@memcpy(self.name_buf[0..file_path.len], file_path);
|
||||
|
|
|
@ -10,12 +10,15 @@ const EventHandler = @import("EventHandler");
|
|||
const Widget = @import("../Widget.zig");
|
||||
const Button = @import("../Button.zig");
|
||||
|
||||
const utf8_sanitized_warning = " UTF";
|
||||
|
||||
line: usize = 0,
|
||||
lines: usize = 0,
|
||||
column: usize = 0,
|
||||
buf: [256]u8 = undefined,
|
||||
rendered: [:0]const u8 = "",
|
||||
eol_mode: Buffer.EolMode = .lf,
|
||||
utf8_sanitized: bool = false,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
|
@ -36,7 +39,8 @@ fn on_click(_: *Self, _: *Button.State(Self)) void {
|
|||
}
|
||||
|
||||
pub fn layout(self: *Self, btn: *Button.State(Self)) Widget.Layout {
|
||||
const len = btn.plane.egc_chunk_width(self.rendered, 0, 1);
|
||||
const warn_len = if (self.utf8_sanitized) btn.plane.egc_chunk_width(utf8_sanitized_warning, 0, 1) else 0;
|
||||
const len = btn.plane.egc_chunk_width(self.rendered, 0, 1) + warn_len;
|
||||
return .{ .static = len };
|
||||
}
|
||||
|
||||
|
@ -47,6 +51,11 @@ pub fn render(self: *Self, btn: *Button.State(Self), theme: *const Widget.Theme)
|
|||
btn.plane.set_style(if (btn.active) theme.editor_cursor else if (btn.hover) theme.statusbar_hover else theme.statusbar);
|
||||
btn.plane.fill(" ");
|
||||
btn.plane.home();
|
||||
if (self.utf8_sanitized) {
|
||||
btn.plane.set_style(.{ .fg = theme.editor_error.fg.? });
|
||||
_ = btn.plane.putstr(utf8_sanitized_warning) catch {};
|
||||
}
|
||||
btn.plane.set_style(if (btn.active) theme.editor_cursor else if (btn.hover) theme.statusbar_hover else theme.statusbar);
|
||||
_ = btn.plane.putstr(self.rendered) catch {};
|
||||
return false;
|
||||
}
|
||||
|
@ -67,7 +76,7 @@ pub fn receive(self: *Self, _: *Button.State(Self), _: tp.pid_ref, m: tp.message
|
|||
var eol_mode: Buffer.EolModeTag = @intFromEnum(Buffer.EolMode.lf);
|
||||
if (try m.match(.{ "E", "pos", tp.extract(&self.lines), tp.extract(&self.line), tp.extract(&self.column) })) {
|
||||
self.format();
|
||||
} else if (try m.match(.{ "E", "eol_mode", tp.extract(&eol_mode) })) {
|
||||
} else if (try m.match(.{ "E", "eol_mode", tp.extract(&eol_mode), tp.extract(&self.utf8_sanitized) })) {
|
||||
self.eol_mode = @enumFromInt(eol_mode);
|
||||
self.format();
|
||||
} else if (try m.match(.{ "E", "open", tp.more })) {
|
||||
|
@ -78,6 +87,7 @@ pub fn receive(self: *Self, _: *Button.State(Self), _: tp.pid_ref, m: tp.message
|
|||
self.column = 0;
|
||||
self.rendered = "";
|
||||
self.eol_mode = .lf;
|
||||
self.utf8_sanitized = false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue