From bc0745bc4b9735e416ce1ba930af9ee99b4856e6 Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Wed, 19 Mar 2025 15:24:46 +0100 Subject: [PATCH] Squashed 'src/syntax/' changes from d5c1cd2a..80ceadcf 80ceadcf fix IB in cache 2d29a6ba feat: allow parallel loading of tree-sitter query cache entries ea00711e feat: add caching of tree-sitter query objects 149a7fc5 fix: do not parse tree-sitter queries twice 788000af Merge commit '77a69410b8bdd993cb092c93353ecd51c0227353' into zig-0.14 0efed0ee Merge branch 'master' into zig-0.14 31f174d6 Merge branch 'master' into zig-0.14 a9d8d26f Merge branch 'master' into zig-0.14 git-subtree-dir: src/syntax git-subtree-split: 80ceadcff90b431378c23ea6c01b9b7c8b7c7ca5 --- src/QueryCache.zig | 158 +++++++++++++++++++++++++++++++++++++++++++++ src/syntax.zig | 26 ++++---- 2 files changed, 173 insertions(+), 11 deletions(-) create mode 100644 src/QueryCache.zig diff --git a/src/QueryCache.zig b/src/QueryCache.zig new file mode 100644 index 0000000..8cfdb91 --- /dev/null +++ b/src/QueryCache.zig @@ -0,0 +1,158 @@ +const std = @import("std"); +const build_options = @import("build_options"); + +const treez = if (build_options.use_tree_sitter) + @import("treez") +else + @import("treez_dummy.zig"); + +const Self = @This(); + +pub const FileType = @import("file_type.zig"); +const Query = treez.Query; + +allocator: std.mem.Allocator, +mutex: ?std.Thread.Mutex, +highlights: std.StringHashMapUnmanaged(*CacheEntry) = .{}, +injections: std.StringHashMapUnmanaged(*CacheEntry) = .{}, +ref_count: usize = 1, + +const CacheEntry = struct { + mutex: ?std.Thread.Mutex, + query: ?*Query, + file_type: *const FileType, + query_type: QueryType, +}; + +pub const QueryType = enum { + highlights, + injections, +}; + +const QueryParseError = error{ + InvalidSyntax, + InvalidNodeType, + InvalidField, + InvalidCapture, + InvalidStructure, + InvalidLanguage, +}; + +const CacheError = error{ + NotFound, + OutOfMemory, +}; + +pub const Error = CacheError || QueryParseError; + +pub fn create(allocator: std.mem.Allocator, opts: struct { lock: bool = false }) !*Self { + const self = try allocator.create(Self); + self.* = .{ + .allocator = allocator, + .mutex = if (opts.lock) .{} else null, + }; + return self; +} + +pub fn deinit(self: *Self) void { + self.release_ref_unlocked_and_maybe_destroy(); +} + +fn add_ref_locked(self: *Self) void { + std.debug.assert(self.ref_count > 0); + self.ref_count += 1; +} + +fn release_ref_unlocked_and_maybe_destroy(self: *Self) void { + { + if (self.mutex) |*mtx| mtx.lock(); + defer if (self.mutex) |*mtx| mtx.unlock(); + self.ref_count -= 1; + if (self.ref_count > 0) return; + } + + var iter_highlights = self.highlights.iterator(); + while (iter_highlights.next()) |p| { + self.allocator.free(p.key_ptr.*); + if (p.value_ptr.*.query) |q| q.destroy(); + } + var iter_injections = self.injections.iterator(); + while (iter_injections.next()) |p| { + self.allocator.free(p.key_ptr.*); + if (p.value_ptr.*.query) |q| q.destroy(); + } + self.highlights.deinit(self.allocator); + self.injections.deinit(self.allocator); + self.allocator.destroy(self); +} + +fn get_cache_entry(self: *Self, file_type: *const FileType, comptime query_type: QueryType) CacheError!*CacheEntry { + if (self.mutex) |*mtx| mtx.lock(); + defer if (self.mutex) |*mtx| mtx.unlock(); + + const hash = switch (query_type) { + .highlights => &self.highlights, + .injections => &self.injections, + }; + + return if (hash.get(file_type.name)) |entry| entry else blk: { + const entry_ = try hash.getOrPut(self.allocator, try self.allocator.dupe(u8, file_type.name)); + + const q = try self.allocator.create(CacheEntry); + q.* = .{ + .query = null, + .mutex = if (self.mutex) |_| .{} else null, + .file_type = file_type, + .query_type = query_type, + }; + entry_.value_ptr.* = q; + + break :blk q; + }; +} + +fn get_cached_query(_: *Self, entry: *CacheEntry) QueryParseError!?*Query { + if (entry.mutex) |*mtx| mtx.lock(); + defer if (entry.mutex) |*mtx| mtx.unlock(); + + return if (entry.query) |query| query else blk: { + const lang = entry.file_type.lang_fn() orelse std.debug.panic("tree-sitter parser function failed for language: {s}", .{entry.file_type.name}); + entry.query = try Query.create(lang, switch (entry.query_type) { + .highlights => entry.file_type.highlights, + .injections => if (entry.file_type.injections) |injections| injections else return null, + }); + break :blk entry.query.?; + }; +} + +fn pre_load_internal(self: *Self, file_type: *const FileType, comptime query_type: QueryType) Error!void { + _ = try self.get_cached_query(try self.get_cache_entry(file_type, query_type)); +} + +pub fn pre_load(self: *Self, lang_name: []const u8) Error!void { + const file_type = FileType.get_by_name(lang_name) orelse return; + _ = try self.pre_load_internal(file_type, .highlights); + _ = try self.pre_load_internal(file_type, .injections); +} + +fn ReturnType(comptime query_type: QueryType) type { + return switch (query_type) { + .highlights => *Query, + .injections => ?*Query, + }; +} + +pub fn get(self: *Self, file_type: *const FileType, comptime query_type: QueryType) Error!ReturnType(query_type) { + const query = try self.get_cached_query(try self.get_cache_entry(file_type, query_type)); + self.add_ref_locked(); + return switch (@typeInfo(ReturnType(query_type))) { + .optional => |_| query, + else => query.?, + }; +} + +pub fn release(self: *Self, query: *Query, comptime query_type: QueryType) void { + _ = query; + _ = query_type; + self.release_ref_unlocked_and_maybe_destroy(); +} diff --git a/src/syntax.zig b/src/syntax.zig index 0f8ff97..20f9b8e 100644 --- a/src/syntax.zig +++ b/src/syntax.zig @@ -10,6 +10,7 @@ const Self = @This(); pub const Edit = treez.InputEdit; pub const FileType = @import("file_type.zig"); +pub const QueryCache = @import("QueryCache.zig"); pub const Range = treez.Range; pub const Point = treez.Point; const Input = treez.Input; @@ -23,37 +24,40 @@ lang: *const Language, file_type: *const FileType, parser: *Parser, query: *Query, -injections: *Query, +injections: ?*Query, tree: ?*treez.Tree = null, -pub fn create(file_type: *const FileType, allocator: std.mem.Allocator) !*Self { +pub fn create(file_type: *const FileType, allocator: std.mem.Allocator, query_cache: *QueryCache) !*Self { + const query = try query_cache.get(file_type, .highlights); + const injections = try query_cache.get(file_type, .injections); const self = try allocator.create(Self); self.* = .{ .allocator = allocator, .lang = file_type.lang_fn() orelse std.debug.panic("tree-sitter parser function failed for language: {s}", .{file_type.name}), .file_type = file_type, .parser = try Parser.create(), - .query = try Query.create(self.lang, file_type.highlights), - .injections = try Query.create(self.lang, file_type.highlights), + .query = query, + .injections = injections, }; - errdefer self.destroy(); + errdefer self.destroy(query_cache); try self.parser.setLanguage(self.lang); return self; } -pub fn create_file_type(allocator: std.mem.Allocator, lang_name: []const u8) !*Self { +pub fn create_file_type(allocator: std.mem.Allocator, lang_name: []const u8, query_cache: *QueryCache) !*Self { const file_type = FileType.get_by_name(lang_name) orelse return error.NotFound; - return create(file_type, allocator); + return create(file_type, allocator, query_cache); } -pub fn create_guess_file_type(allocator: std.mem.Allocator, content: []const u8, file_path: ?[]const u8) !*Self { +pub fn create_guess_file_type(allocator: std.mem.Allocator, content: []const u8, file_path: ?[]const u8, query_cache: *QueryCache) !*Self { const file_type = FileType.guess(file_path, content) orelse return error.NotFound; - return create(file_type, allocator); + return create(file_type, allocator, query_cache); } -pub fn destroy(self: *Self) void { +pub fn destroy(self: *Self, query_cache: *QueryCache) void { if (self.tree) |tree| tree.destroy(); - self.query.destroy(); + query_cache.release(self.query, .highlights); + if (self.injections) |injections| query_cache.release(injections, .injections); self.parser.destroy(); self.allocator.destroy(self); }