From 5db2ec10b6d59e5ffee2c3e3532e10ce3ecf4912 Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Thu, 24 Apr 2025 18:07:22 +0200 Subject: [PATCH 1/3] feat: add support for pre-generated tree-sitter error queries --- src/syntax/src/QueryCache.zig | 33 +++++++++++++++++------------ src/syntax/src/file_type.zig | 7 +++++- src/syntax/src/ts_bin_query_gen.zig | 24 +++++++++++++++++---- 3 files changed, 45 insertions(+), 19 deletions(-) diff --git a/src/syntax/src/QueryCache.zig b/src/syntax/src/QueryCache.zig index 8869db1..a9dad05 100644 --- a/src/syntax/src/QueryCache.zig +++ b/src/syntax/src/QueryCache.zig @@ -16,6 +16,7 @@ allocator: std.mem.Allocator, mutex: ?std.Thread.Mutex, highlights: std.StringHashMapUnmanaged(*CacheEntry) = .{}, injections: std.StringHashMapUnmanaged(*CacheEntry) = .{}, +errors: std.StringHashMapUnmanaged(*CacheEntry) = .{}, ref_count: usize = 1, const CacheEntry = struct { @@ -38,6 +39,7 @@ const CacheEntry = struct { pub const QueryType = enum { highlights, + errors, injections, }; @@ -83,29 +85,29 @@ fn release_ref_unlocked_and_maybe_destroy(self: *Self) void { if (self.ref_count > 0) return; } - var iter_highlights = self.highlights.iterator(); - while (iter_highlights.next()) |p| { - self.allocator.free(p.key_ptr.*); - p.value_ptr.*.destroy(self.allocator); - self.allocator.destroy(p.value_ptr.*); - } - var iter_injections = self.injections.iterator(); - while (iter_injections.next()) |p| { - self.allocator.free(p.key_ptr.*); - p.value_ptr.*.destroy(self.allocator); - self.allocator.destroy(p.value_ptr.*); - } - self.highlights.deinit(self.allocator); - self.injections.deinit(self.allocator); + release_cache_entry_hash_map(self.allocator, &self.highlights); + release_cache_entry_hash_map(self.allocator, &self.errors); + release_cache_entry_hash_map(self.allocator, &self.injections); self.allocator.destroy(self); } +fn release_cache_entry_hash_map(allocator: std.mem.Allocator, hash_map: *std.StringHashMapUnmanaged(*CacheEntry)) void { + var iter = hash_map.iterator(); + while (iter.next()) |p| { + allocator.free(p.key_ptr.*); + p.value_ptr.*.destroy(allocator); + allocator.destroy(p.value_ptr.*); + } + hash_map.deinit(allocator); +} + fn get_cache_entry(self: *Self, file_type: *const FileType, comptime query_type: QueryType) CacheError!*CacheEntry { if (self.mutex) |*mtx| mtx.lock(); defer if (self.mutex) |*mtx| mtx.unlock(); const hash = switch (query_type) { .highlights => &self.highlights, + .errors => &self.errors, .injections => &self.injections, }; @@ -135,6 +137,7 @@ fn get_cached_query(self: *Self, entry: *CacheEntry) Error!?*Query { const queries = FileType.queries.get(entry.file_type.name) orelse return null; const query_bin = switch (entry.query_type) { .highlights => queries.highlights_bin, + .errors => queries.errors_bin, .injections => queries.injections_bin orelse return null, }; const query, const arena = try deserialize_query(query_bin, lang, self.allocator); @@ -151,12 +154,14 @@ fn pre_load_internal(self: *Self, file_type: *const FileType, comptime query_typ pub fn pre_load(self: *Self, lang_name: []const u8) Error!void { const file_type = FileType.get_by_name(lang_name) orelse return; _ = try self.pre_load_internal(file_type, .highlights); + _ = try self.pre_load_internal(file_type, .errors); _ = try self.pre_load_internal(file_type, .injections); } fn ReturnType(comptime query_type: QueryType) type { return switch (query_type) { .highlights => *Query, + .errors => *Query, .injections => ?*Query, }; } diff --git a/src/syntax/src/file_type.zig b/src/syntax/src/file_type.zig index f8aa5f3..4c45c64 100644 --- a/src/syntax/src/file_type.zig +++ b/src/syntax/src/file_type.zig @@ -138,6 +138,7 @@ fn load_file_types(comptime Namespace: type) []const FileType { pub const FileTypeQueries = struct { highlights_bin: []const u8, + errors_bin: []const u8, injections_bin: ?[]const u8, }; @@ -145,7 +146,7 @@ pub const queries = std.static_string_map.StaticStringMap(FileTypeQueries).initC fn load_queries() []const struct { []const u8, FileTypeQueries } { if (!build_options.use_tree_sitter) return &.{}; - @setEvalBranchQuota(16000); + @setEvalBranchQuota(32000); const queries_cb = @embedFile("syntax_bin_queries"); var iter: []const u8 = queries_cb; var len = cbor.decodeMapHeader(&iter) catch |e| { @@ -163,6 +164,10 @@ fn load_queries() []const struct { []const u8, FileTypeQueries } { var iter_: []const u8 = iter; break :blk get_query_value_bin(&iter_, "highlights") orelse @compileError("missing highlights for " ++ lang); }, + .errors_bin = blk: { + var iter_: []const u8 = iter; + break :blk get_query_value_bin(&iter_, "errors") orelse @compileError("missing errors query for " ++ lang); + }, .injections_bin = blk: { var iter_: []const u8 = iter; break :blk get_query_value_bin(&iter_, "injections"); diff --git a/src/syntax/src/ts_bin_query_gen.zig b/src/syntax/src/ts_bin_query_gen.zig index 14c86e7..e32c6ef 100644 --- a/src/syntax/src/ts_bin_query_gen.zig +++ b/src/syntax/src/ts_bin_query_gen.zig @@ -4,6 +4,8 @@ const treez = @import("treez"); pub const tss = @import("ts_serializer.zig"); +const verbose = false; + pub fn main() anyerror!void { const allocator = std.heap.c_allocator; const args = try std.process.argsAlloc(allocator); @@ -33,7 +35,7 @@ pub fn main() anyerror!void { const lang = file_type.lang_fn() orelse std.debug.panic("tree-sitter parser function failed for language: {s}", .{file_type.name}); try cbor.writeValue(writer, file_type.name); - try cbor.writeMapHeader(writer, if (file_type.injections) |_| 2 else 1); + try cbor.writeMapHeader(writer, if (file_type.injections) |_| 3 else 2); const highlights_in = try treez.Query.create(lang, file_type.highlights); const ts_highlights_in: *tss.TSQuery = @alignCast(@ptrCast(highlights_in)); @@ -43,7 +45,19 @@ pub fn main() anyerror!void { try cbor.writeValue(writer, "highlights"); try cbor.writeValue(writer, highlights_cb); - // std.log.info("file_type {s} highlights {d} bytes", .{ file_type.name, highlights_cb.len }); + if (verbose) + std.log.info("file_type {s} highlights {d} bytes", .{ file_type.name, highlights_cb.len }); + + const errors_in = try treez.Query.create(lang, "(ERROR) @error"); + const ts_errors_in: *tss.TSQuery = @alignCast(@ptrCast(errors_in)); + + const errors_cb = try tss.toCbor(ts_errors_in, allocator); + defer allocator.free(errors_cb); + + try cbor.writeValue(writer, "errors"); + try cbor.writeValue(writer, errors_cb); + if (verbose) + std.log.info("file_type {s} errors {d} bytes", .{ file_type.name, errors_cb.len }); if (file_type.injections) |injections| { const injections_in = try treez.Query.create(lang, injections); @@ -54,12 +68,14 @@ pub fn main() anyerror!void { try cbor.writeValue(writer, "injections"); try cbor.writeValue(writer, injections_cb); - // std.log.info("file_type {s} injections {d} bytes", .{ file_type.name, injections_cb.len }); + if (verbose) + std.log.info("file_type {s} injections {d} bytes", .{ file_type.name, injections_cb.len }); } } try output_file.writeAll(output.items); - // std.log.info("file_types total {d} bytes", .{output.items.len}); + if (verbose) + std.log.info("file_types total {d} bytes", .{output.items.len}); } fn fatal(comptime format: []const u8, args: anytype) noreturn { From 4b84e35b9ffce16138fca4a4268d4e591dc6963d Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Thu, 24 Apr 2025 18:08:31 +0200 Subject: [PATCH 2/3] feat: add syntax.count_error_nodes function --- src/syntax/src/syntax.zig | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/syntax/src/syntax.zig b/src/syntax/src/syntax.zig index 20f9b8e..2e5096e 100644 --- a/src/syntax/src/syntax.zig +++ b/src/syntax/src/syntax.zig @@ -24,11 +24,13 @@ lang: *const Language, file_type: *const FileType, parser: *Parser, query: *Query, +errors_query: *Query, injections: ?*Query, tree: ?*treez.Tree = null, pub fn create(file_type: *const FileType, allocator: std.mem.Allocator, query_cache: *QueryCache) !*Self { const query = try query_cache.get(file_type, .highlights); + const errors_query = try query_cache.get(file_type, .errors); const injections = try query_cache.get(file_type, .injections); const self = try allocator.create(Self); self.* = .{ @@ -37,6 +39,7 @@ pub fn create(file_type: *const FileType, allocator: std.mem.Allocator, query_ca .file_type = file_type, .parser = try Parser.create(), .query = query, + .errors_query = errors_query, .injections = injections, }; errdefer self.destroy(query_cache); @@ -194,3 +197,15 @@ pub fn node_at_point_range(self: *const Self, range: Range) error{Stop}!treez.No const root_node = tree.getRootNode(); return treez.Node.externs.ts_node_descendant_for_point_range(root_node, range.start_point, range.end_point); } + +pub fn count_error_nodes(self: *const Self) usize { + const cursor = Query.Cursor.create() catch return std.math.maxInt(usize); + defer cursor.destroy(); + const tree = self.tree orelse return 0; + cursor.execute(self.errors_query, tree.getRootNode()); + var error_count: usize = 0; + while (cursor.nextMatch()) |match| for (match.captures()) |_| { + error_count += 1; + }; + return error_count; +} From 05a14ae95c6dc1cfdeb10ea303aba6d9c01c475c Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Thu, 24 Apr 2025 21:09:51 +0200 Subject: [PATCH 3/3] feat: do a full reparse if tree-sitter reports syntax errors This change will scan for tree-sitter errors after a partial reparse and do a full reparse if there are are more errors than the error threshold (currently 5). This helps prevent the tree-sitter syntax tree getting out of sync during multi-cursor edits. --- src/tui/editor.zig | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/tui/editor.zig b/src/tui/editor.zig index f1ff62b..48b13b1 100644 --- a/src/tui/editor.zig +++ b/src/tui/editor.zig @@ -35,6 +35,7 @@ const scroll_cursor_min_border_distance = 5; const double_click_time_ms = 350; const syntax_full_reparse_time_limit = 0; // ms (0 = always use incremental) +const syntax_full_reparse_error_threshold = 3; // number of tree-sitter errors that trigger a full reparse pub const max_matches = if (builtin.mode == std.builtin.OptimizeMode.Debug) 10_000 else 100_000; pub const max_match_lines = 15; @@ -888,7 +889,7 @@ pub const Editor = struct { self.style_cache_theme = theme.name; const cache: *StyleCache = &self.style_cache.?; self.render_screen(theme, cache); - return self.scroll_dest != self.view.row; + return self.scroll_dest != self.view.row or self.syntax_refresh_full; } const CellType = enum { @@ -4480,7 +4481,7 @@ pub const Editor = struct { fn update_syntax(self: *Self) !void { const root = try self.buf_root(); const eol_mode = try self.buf_eol_mode(); - if (self.syntax_last_rendered_root == root) + if (!self.syntax_refresh_full and self.syntax_last_rendered_root == root) return; var kind: enum { full, incremental, none } = .none; var edit_count: usize = 0; @@ -4491,6 +4492,7 @@ pub const Editor = struct { defer frame.deinit(); syn.reset(); self.syntax_last_rendered_root = null; + self.syntax_refresh_full = false; return; } if (!self.syntax_incremental_reparse) @@ -4544,6 +4546,11 @@ pub const Editor = struct { const frame = tracy.initZone(@src(), .{ .name = "editor refresh syntax" }); defer frame.deinit(); try syn.refresh_from_string(content); + const error_count = syn.count_error_nodes(); + if (error_count >= syntax_full_reparse_error_threshold) { + self.logger.print("incremental syntax update has {d} errors -> full reparse", .{error_count}); + self.syntax_refresh_full = true; + } } self.syntax_last_rendered_root = root; kind = .incremental;