feat: improve performance of incremental re-parsing

We use Buffer.Node.get_byte_pos instead of Buffer.Node.get_range to calculate
tree-sitter edits for incremental parsing. This reduces the number of
egc_length calls required per syntax.edit call to the minimum.
This commit is contained in:
CJ van den Berg 2024-07-29 21:46:34 +02:00
parent b0a99a0813
commit 42ee6459e6
4 changed files with 106 additions and 19 deletions

View file

@ -24,8 +24,8 @@
.hash = "1220906d82deb37573207c5d365edb6b58a0b1b3e4ef68902019a23a0eb5e5a34298", .hash = "1220906d82deb37573207c5d365edb6b58a0b1b3e4ef68902019a23a0eb5e5a34298",
}, },
.syntax = .{ .syntax = .{
.url = "https://github.com/neurocyte/flow-syntax/archive/26b99478af9a1f09e0af8fd77fbdbe44e2015da4.tar.gz", .url = "https://github.com/neurocyte/flow-syntax/archive/3619572ed88841cd2106c141c0baacfcb8496023.tar.gz",
.hash = "1220f993fa053a2a76355906e74e1559f770fd612b303c8b4a1bafa116ab157b5efa", .hash = "12206cb26ee5b770ed71ffc230be94bb229de4ed23ca35497ea94770d9bb793f77c6",
}, },
.fuzzig = .{ .fuzzig = .{
.url = "https://github.com/fjebaker/fuzzig/archive/0fd156d5097365151e85a85eef9d8cf0eebe7b00.tar.gz", .url = "https://github.com/fjebaker/fuzzig/archive/0fd156d5097365151e85a85eef9d8cf0eebe7b00.tar.gz",

View file

@ -585,9 +585,10 @@ const Node = union(enum) {
var ctx: Ctx = .{ .sel = sel, .out = copy_buf }; var ctx: Ctx = .{ .sel = sel, .out = copy_buf };
ctx.sel.normalize(); ctx.sel.normalize();
if (ctx.sel.begin.eql(ctx.sel.end)) if (ctx.sel.begin.eql(ctx.sel.end))
return error.Stop; return if (copy_buf) |_| "" else null;
self.walk_egc_forward(ctx.sel.begin.row, Ctx.walker, &ctx, metrics_) catch |e| return switch (e) { self.walk_egc_forward(ctx.sel.begin.row, Ctx.walker, &ctx, metrics_) catch |e| return switch (e) {
error.NoSpaceLeft => error.NoSpaceLeft, error.NoSpaceLeft => error.NoSpaceLeft,
error.Stop => if (copy_buf) |buf_| buf_[0..ctx.bytes] else null,
else => error.Stop, else => error.Stop,
}; };
if (size) |p| p.* = ctx.bytes; if (size) |p| p.* = ctx.bytes;
@ -599,7 +600,7 @@ const Node = union(enum) {
var end: Cursor = .{}; var end: Cursor = .{};
end.move_buffer_end(self, metrics); end.move_buffer_end(self, metrics);
const result = self.get_range(.{ .begin = start, .end = end }, result_buf, null, null, metrics) catch |e| switch (e) { const result = self.get_range(.{ .begin = start, .end = end }, result_buf, null, null, metrics) catch |e| switch (e) {
error.NoSpaceLeft => result_buf[0..], error.NoSpaceLeft => result_buf,
else => @panic("buffer overflow in get_from_start_pos"), else => @panic("buffer overflow in get_from_start_pos"),
}; };
return result orelse ""; return result orelse "";
@ -935,6 +936,55 @@ const Node = union(enum) {
return self.store(ctx.writer()); return self.store(ctx.writer());
} }
pub fn get_byte_pos(self: *const Node, pos_: Cursor, metrics_: Metrics) !usize {
const Ctx = struct {
line: usize = 0,
abs_col: usize = 0,
pos: Cursor,
byte_pos: usize = 0,
metrics: Metrics,
const Ctx = @This();
const Writer = std.io.Writer(*Ctx, error{Stop}, write);
fn write(ctx: *Ctx, bytes: []const u8) error{Stop}!usize {
if (ctx.line >= ctx.pos.row) {
return ctx.get_col_bytes(bytes, bytes.len);
} else for (bytes, 1..) |char, i| {
ctx.byte_pos += 1;
if (char == '\n') {
ctx.line += 1;
if (ctx.line >= ctx.pos.row)
return ctx.get_col_bytes(bytes[i..], bytes.len);
}
}
return bytes.len;
}
fn get_col_bytes(ctx: *Ctx, bytes: []const u8, result: usize) error{Stop}!usize {
var buf: []const u8 = bytes;
while (buf.len > 0) {
if (ctx.abs_col >= ctx.pos.col) return error.Stop;
if (buf[0] == '\n') return error.Stop;
var cols: c_int = undefined;
const egc_bytes = ctx.metrics.egc_length(ctx.metrics.ctx, buf, &cols, ctx.abs_col);
ctx.abs_col += @intCast(cols);
ctx.byte_pos += egc_bytes;
buf = buf[egc_bytes..];
}
return result;
}
fn writer(ctx: *Ctx) Writer {
return .{ .context = ctx };
}
};
var ctx: Ctx = .{
.pos = pos_,
.metrics = metrics_,
};
self.store(ctx.writer()) catch |e| switch (e) {
error.Stop => return ctx.byte_pos,
};
return error.NotFound;
}
pub fn debug_render_chunks(self: *const Node, line: usize, output: *ArrayList(u8), metrics_: Metrics) !void { pub fn debug_render_chunks(self: *const Node, line: usize, output: *ArrayList(u8), metrics_: Metrics) !void {
const ctx_ = struct { const ctx_ = struct {
l: *ArrayList(u8), l: *ArrayList(u8),

View file

@ -1440,8 +1440,7 @@ pub const Editor = struct {
match.nudge_insert(nudge); match.nudge_insert(nudge);
if (self.syntax) |syn| { if (self.syntax) |syn| {
const root = self.buf_root() catch return; const root = self.buf_root() catch return;
var start_byte: usize = 0; const start_byte = root.get_byte_pos(nudge.begin, self.plane.metrics()) catch return;
_ = root.get_range(.{ .begin = .{}, .end = nudge.begin }, null, &start_byte, null, self.plane.metrics()) catch return;
syn.edit(.{ syn.edit(.{
.start_byte = @intCast(start_byte), .start_byte = @intCast(start_byte),
.old_end_byte = @intCast(start_byte), .old_end_byte = @intCast(start_byte),
@ -1454,7 +1453,6 @@ pub const Editor = struct {
} }
fn nudge_delete(self: *Self, nudge: Selection, exclude: *const CurSel, size: usize) void { fn nudge_delete(self: *Self, nudge: Selection, exclude: *const CurSel, size: usize) void {
_ = size;
for (self.cursels.items, 0..) |*cursel_, i| if (cursel_.*) |*cursel| for (self.cursels.items, 0..) |*cursel_, i| if (cursel_.*) |*cursel|
if (cursel != exclude) if (cursel != exclude)
if (!cursel.nudge_delete(nudge)) { if (!cursel.nudge_delete(nudge)) {
@ -1464,7 +1462,18 @@ pub const Editor = struct {
if (!match.nudge_delete(nudge)) { if (!match.nudge_delete(nudge)) {
self.matches.items[i] = null; self.matches.items[i] = null;
}; };
self.reset_syntax(); if (self.syntax) |syn| {
const root = self.buf_root() catch return;
const start_byte = root.get_byte_pos(nudge.begin, self.plane.metrics()) catch return;
syn.edit(.{
.start_byte = @intCast(start_byte),
.old_end_byte = @intCast(start_byte + size),
.new_end_byte = @intCast(start_byte),
.start_point = .{ .row = @intCast(nudge.begin.row), .column = @intCast(nudge.begin.col) },
.old_end_point = .{ .row = @intCast(nudge.end.row), .column = @intCast(nudge.end.col) },
.new_end_point = .{ .row = @intCast(nudge.begin.row), .column = @intCast(nudge.begin.col) },
});
}
} }
fn delete_selection(self: *Self, root: Buffer.Root, cursel: *CurSel, a: Allocator) error{Stop}!Buffer.Root { fn delete_selection(self: *Self, root: Buffer.Root, cursel: *CurSel, a: Allocator) error{Stop}!Buffer.Root {
@ -2454,7 +2463,6 @@ pub const Editor = struct {
} }
pub fn indent(self: *Self, _: Context) Result { pub fn indent(self: *Self, _: Context) Result {
defer self.reset_syntax();
const b = try self.buf_for_update(); const b = try self.buf_for_update();
const root = try self.with_cursels_mut(b.root, indent_cursel, b.a); const root = try self.with_cursels_mut(b.root, indent_cursel, b.a);
try self.update_buf(root); try self.update_buf(root);
@ -2495,7 +2503,6 @@ pub const Editor = struct {
} }
pub fn unindent(self: *Self, _: Context) Result { pub fn unindent(self: *Self, _: Context) Result {
defer self.reset_syntax();
const b = try self.buf_for_update(); const b = try self.buf_for_update();
const root = try self.with_cursels_mut(b.root, unindent_cursel, b.a); const root = try self.with_cursels_mut(b.root, unindent_cursel, b.a);
try self.update_buf(root); try self.update_buf(root);
@ -2902,18 +2909,21 @@ pub const Editor = struct {
const token = @intFromPtr(root); const token = @intFromPtr(root);
if (self.syntax_token == token) if (self.syntax_token == token)
return; return;
var content = std.ArrayList(u8).init(self.a);
defer content.deinit();
try root.store(content.writer());
if (self.syntax) |syn| { if (self.syntax) |syn| {
try if (self.syntax_refresh_full) if (self.syntax_refresh_full) {
syn.refresh_full(content.items) var content = std.ArrayList(u8).init(self.a);
else defer content.deinit();
syn.refresh(content.items); try root.store(content.writer());
// syn.refresh_from_buffer(root, self.plane.metrics()); // TODO: partial refresh from buffer when treez PR is merged and flow-syntax is updated try syn.refresh_full(content.items);
self.syntax_refresh_full = false; self.syntax_refresh_full = false;
} else {
try syn.refresh_from_buffer(root, self.plane.metrics());
}
self.syntax_token = token; self.syntax_token = token;
} else { } else {
var content = std.ArrayList(u8).init(self.a);
defer content.deinit();
try root.store(content.writer());
self.syntax = syntax.create_guess_file_type(self.a, content.items, self.file_path) catch |e| switch (e) { self.syntax = syntax.create_guess_file_type(self.a, content.items, self.file_path) catch |e| switch (e) {
error.NotFound => null, error.NotFound => null,
else => return e, else => return e,

View file

@ -151,6 +151,33 @@ test "line_len" {
try std.testing.expectEqual(try buffer.root.line_width(1, metrics()), 5); try std.testing.expectEqual(try buffer.root.line_width(1, metrics()), 5);
} }
test "get_byte_pos" {
const doc: []const u8 =
\\All your
\\ropes
\\are belong to
\\us!
\\All your
\\ropes
\\are belong to
\\us!
\\All your
\\ropes
\\are belong to
\\us!
;
const buffer = try Buffer.create(a);
defer buffer.deinit();
buffer.update(try buffer.load_from_string(doc));
try std.testing.expectEqual(0, try buffer.root.get_byte_pos(.{ .row = 0, .col = 0 }, metrics()));
try std.testing.expectEqual(9, try buffer.root.get_byte_pos(.{ .row = 1, .col = 0 }, metrics()));
try std.testing.expectEqual(11, try buffer.root.get_byte_pos(.{ .row = 1, .col = 2 }, metrics()));
try std.testing.expectEqual(33, try buffer.root.get_byte_pos(.{ .row = 4, .col = 0 }, metrics()));
try std.testing.expectEqual(66, try buffer.root.get_byte_pos(.{ .row = 8, .col = 0 }, metrics()));
try std.testing.expectEqual(97, try buffer.root.get_byte_pos(.{ .row = 11, .col = 2 }, metrics()));
}
test "del_chars" { test "del_chars" {
const doc: []const u8 = const doc: []const u8 =
\\All your \\All your