From 42ee6459e6922dd616ccdf782394957558a7f428 Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Mon, 29 Jul 2024 21:46:34 +0200 Subject: [PATCH] feat: improve performance of incremental re-parsing We use Buffer.Node.get_byte_pos instead of Buffer.Node.get_range to calculate tree-sitter edits for incremental parsing. This reduces the number of egc_length calls required per syntax.edit call to the minimum. --- build.zig.zon | 4 ++-- src/buffer/Buffer.zig | 54 +++++++++++++++++++++++++++++++++++++++++-- src/tui/editor.zig | 40 ++++++++++++++++++++------------ test/tests_buffer.zig | 27 ++++++++++++++++++++++ 4 files changed, 106 insertions(+), 19 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index 3c0a6d7..d15baeb 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -24,8 +24,8 @@ .hash = "1220906d82deb37573207c5d365edb6b58a0b1b3e4ef68902019a23a0eb5e5a34298", }, .syntax = .{ - .url = "https://github.com/neurocyte/flow-syntax/archive/26b99478af9a1f09e0af8fd77fbdbe44e2015da4.tar.gz", - .hash = "1220f993fa053a2a76355906e74e1559f770fd612b303c8b4a1bafa116ab157b5efa", + .url = "https://github.com/neurocyte/flow-syntax/archive/3619572ed88841cd2106c141c0baacfcb8496023.tar.gz", + .hash = "12206cb26ee5b770ed71ffc230be94bb229de4ed23ca35497ea94770d9bb793f77c6", }, .fuzzig = .{ .url = "https://github.com/fjebaker/fuzzig/archive/0fd156d5097365151e85a85eef9d8cf0eebe7b00.tar.gz", diff --git a/src/buffer/Buffer.zig b/src/buffer/Buffer.zig index 9a2bc15..8f548ef 100644 --- a/src/buffer/Buffer.zig +++ b/src/buffer/Buffer.zig @@ -585,9 +585,10 @@ const Node = union(enum) { var ctx: Ctx = .{ .sel = sel, .out = copy_buf }; ctx.sel.normalize(); if (ctx.sel.begin.eql(ctx.sel.end)) - return error.Stop; + return if (copy_buf) |_| "" else null; self.walk_egc_forward(ctx.sel.begin.row, Ctx.walker, &ctx, metrics_) catch |e| return switch (e) { error.NoSpaceLeft => error.NoSpaceLeft, + error.Stop => if (copy_buf) |buf_| buf_[0..ctx.bytes] else null, else => error.Stop, }; if (size) |p| p.* = ctx.bytes; @@ -599,7 +600,7 @@ const Node = union(enum) { var end: Cursor = .{}; end.move_buffer_end(self, metrics); const result = self.get_range(.{ .begin = start, .end = end }, result_buf, null, null, metrics) catch |e| switch (e) { - error.NoSpaceLeft => result_buf[0..], + error.NoSpaceLeft => result_buf, else => @panic("buffer overflow in get_from_start_pos"), }; return result orelse ""; @@ -935,6 +936,55 @@ const Node = union(enum) { return self.store(ctx.writer()); } + pub fn get_byte_pos(self: *const Node, pos_: Cursor, metrics_: Metrics) !usize { + const Ctx = struct { + line: usize = 0, + abs_col: usize = 0, + pos: Cursor, + byte_pos: usize = 0, + metrics: Metrics, + const Ctx = @This(); + const Writer = std.io.Writer(*Ctx, error{Stop}, write); + fn write(ctx: *Ctx, bytes: []const u8) error{Stop}!usize { + if (ctx.line >= ctx.pos.row) { + return ctx.get_col_bytes(bytes, bytes.len); + } else for (bytes, 1..) |char, i| { + ctx.byte_pos += 1; + if (char == '\n') { + ctx.line += 1; + if (ctx.line >= ctx.pos.row) + return ctx.get_col_bytes(bytes[i..], bytes.len); + } + } + return bytes.len; + } + fn get_col_bytes(ctx: *Ctx, bytes: []const u8, result: usize) error{Stop}!usize { + var buf: []const u8 = bytes; + while (buf.len > 0) { + if (ctx.abs_col >= ctx.pos.col) return error.Stop; + if (buf[0] == '\n') return error.Stop; + var cols: c_int = undefined; + const egc_bytes = ctx.metrics.egc_length(ctx.metrics.ctx, buf, &cols, ctx.abs_col); + ctx.abs_col += @intCast(cols); + ctx.byte_pos += egc_bytes; + buf = buf[egc_bytes..]; + } + return result; + } + fn writer(ctx: *Ctx) Writer { + return .{ .context = ctx }; + } + }; + var ctx: Ctx = .{ + .pos = pos_, + .metrics = metrics_, + }; + self.store(ctx.writer()) catch |e| switch (e) { + error.Stop => return ctx.byte_pos, + }; + return error.NotFound; + } + pub fn debug_render_chunks(self: *const Node, line: usize, output: *ArrayList(u8), metrics_: Metrics) !void { const ctx_ = struct { l: *ArrayList(u8), diff --git a/src/tui/editor.zig b/src/tui/editor.zig index afec546..0eeee25 100644 --- a/src/tui/editor.zig +++ b/src/tui/editor.zig @@ -1440,8 +1440,7 @@ pub const Editor = struct { match.nudge_insert(nudge); if (self.syntax) |syn| { const root = self.buf_root() catch return; - var start_byte: usize = 0; - _ = root.get_range(.{ .begin = .{}, .end = nudge.begin }, null, &start_byte, null, self.plane.metrics()) catch return; + const start_byte = root.get_byte_pos(nudge.begin, self.plane.metrics()) catch return; syn.edit(.{ .start_byte = @intCast(start_byte), .old_end_byte = @intCast(start_byte), @@ -1454,7 +1453,6 @@ pub const Editor = struct { } fn nudge_delete(self: *Self, nudge: Selection, exclude: *const CurSel, size: usize) void { - _ = size; for (self.cursels.items, 0..) |*cursel_, i| if (cursel_.*) |*cursel| if (cursel != exclude) if (!cursel.nudge_delete(nudge)) { @@ -1464,7 +1462,18 @@ pub const Editor = struct { if (!match.nudge_delete(nudge)) { self.matches.items[i] = null; }; - self.reset_syntax(); + if (self.syntax) |syn| { + const root = self.buf_root() catch return; + const start_byte = root.get_byte_pos(nudge.begin, self.plane.metrics()) catch return; + syn.edit(.{ + .start_byte = @intCast(start_byte), + .old_end_byte = @intCast(start_byte + size), + .new_end_byte = @intCast(start_byte), + .start_point = .{ .row = @intCast(nudge.begin.row), .column = @intCast(nudge.begin.col) }, + .old_end_point = .{ .row = @intCast(nudge.end.row), .column = @intCast(nudge.end.col) }, + .new_end_point = .{ .row = @intCast(nudge.begin.row), .column = @intCast(nudge.begin.col) }, + }); + } } fn delete_selection(self: *Self, root: Buffer.Root, cursel: *CurSel, a: Allocator) error{Stop}!Buffer.Root { @@ -2454,7 +2463,6 @@ pub const Editor = struct { } pub fn indent(self: *Self, _: Context) Result { - defer self.reset_syntax(); const b = try self.buf_for_update(); const root = try self.with_cursels_mut(b.root, indent_cursel, b.a); try self.update_buf(root); @@ -2495,7 +2503,6 @@ pub const Editor = struct { } pub fn unindent(self: *Self, _: Context) Result { - defer self.reset_syntax(); const b = try self.buf_for_update(); const root = try self.with_cursels_mut(b.root, unindent_cursel, b.a); try self.update_buf(root); @@ -2902,18 +2909,21 @@ pub const Editor = struct { const token = @intFromPtr(root); if (self.syntax_token == token) return; - var content = std.ArrayList(u8).init(self.a); - defer content.deinit(); - try root.store(content.writer()); if (self.syntax) |syn| { - try if (self.syntax_refresh_full) - syn.refresh_full(content.items) - else - syn.refresh(content.items); - // syn.refresh_from_buffer(root, self.plane.metrics()); // TODO: partial refresh from buffer when treez PR is merged and flow-syntax is updated - self.syntax_refresh_full = false; + if (self.syntax_refresh_full) { + var content = std.ArrayList(u8).init(self.a); + defer content.deinit(); + try root.store(content.writer()); + try syn.refresh_full(content.items); + self.syntax_refresh_full = false; + } else { + try syn.refresh_from_buffer(root, self.plane.metrics()); + } self.syntax_token = token; } else { + var content = std.ArrayList(u8).init(self.a); + defer content.deinit(); + try root.store(content.writer()); self.syntax = syntax.create_guess_file_type(self.a, content.items, self.file_path) catch |e| switch (e) { error.NotFound => null, else => return e, diff --git a/test/tests_buffer.zig b/test/tests_buffer.zig index 2ec3b53..e92d237 100644 --- a/test/tests_buffer.zig +++ b/test/tests_buffer.zig @@ -151,6 +151,33 @@ test "line_len" { try std.testing.expectEqual(try buffer.root.line_width(1, metrics()), 5); } +test "get_byte_pos" { + const doc: []const u8 = + \\All your + \\ropes + \\are belong to + \\us! + \\All your + \\ropes + \\are belong to + \\us! + \\All your + \\ropes + \\are belong to + \\us! + ; + const buffer = try Buffer.create(a); + defer buffer.deinit(); + buffer.update(try buffer.load_from_string(doc)); + + try std.testing.expectEqual(0, try buffer.root.get_byte_pos(.{ .row = 0, .col = 0 }, metrics())); + try std.testing.expectEqual(9, try buffer.root.get_byte_pos(.{ .row = 1, .col = 0 }, metrics())); + try std.testing.expectEqual(11, try buffer.root.get_byte_pos(.{ .row = 1, .col = 2 }, metrics())); + try std.testing.expectEqual(33, try buffer.root.get_byte_pos(.{ .row = 4, .col = 0 }, metrics())); + try std.testing.expectEqual(66, try buffer.root.get_byte_pos(.{ .row = 8, .col = 0 }, metrics())); + try std.testing.expectEqual(97, try buffer.root.get_byte_pos(.{ .row = 11, .col = 2 }, metrics())); +} + test "del_chars" { const doc: []const u8 = \\All your