feat: add support for pre-generated tree-sitter error queries

This commit is contained in:
CJ van den Berg 2025-04-24 18:07:22 +02:00
parent c217db02f2
commit 5db2ec10b6
Signed by: neurocyte
GPG key ID: 8EB1E1BB660E3FB9
3 changed files with 45 additions and 19 deletions

View file

@ -16,6 +16,7 @@ allocator: std.mem.Allocator,
mutex: ?std.Thread.Mutex, mutex: ?std.Thread.Mutex,
highlights: std.StringHashMapUnmanaged(*CacheEntry) = .{}, highlights: std.StringHashMapUnmanaged(*CacheEntry) = .{},
injections: std.StringHashMapUnmanaged(*CacheEntry) = .{}, injections: std.StringHashMapUnmanaged(*CacheEntry) = .{},
errors: std.StringHashMapUnmanaged(*CacheEntry) = .{},
ref_count: usize = 1, ref_count: usize = 1,
const CacheEntry = struct { const CacheEntry = struct {
@ -38,6 +39,7 @@ const CacheEntry = struct {
pub const QueryType = enum { pub const QueryType = enum {
highlights, highlights,
errors,
injections, injections,
}; };
@ -83,29 +85,29 @@ fn release_ref_unlocked_and_maybe_destroy(self: *Self) void {
if (self.ref_count > 0) return; if (self.ref_count > 0) return;
} }
var iter_highlights = self.highlights.iterator(); release_cache_entry_hash_map(self.allocator, &self.highlights);
while (iter_highlights.next()) |p| { release_cache_entry_hash_map(self.allocator, &self.errors);
self.allocator.free(p.key_ptr.*); release_cache_entry_hash_map(self.allocator, &self.injections);
p.value_ptr.*.destroy(self.allocator);
self.allocator.destroy(p.value_ptr.*);
}
var iter_injections = self.injections.iterator();
while (iter_injections.next()) |p| {
self.allocator.free(p.key_ptr.*);
p.value_ptr.*.destroy(self.allocator);
self.allocator.destroy(p.value_ptr.*);
}
self.highlights.deinit(self.allocator);
self.injections.deinit(self.allocator);
self.allocator.destroy(self); self.allocator.destroy(self);
} }
fn release_cache_entry_hash_map(allocator: std.mem.Allocator, hash_map: *std.StringHashMapUnmanaged(*CacheEntry)) void {
var iter = hash_map.iterator();
while (iter.next()) |p| {
allocator.free(p.key_ptr.*);
p.value_ptr.*.destroy(allocator);
allocator.destroy(p.value_ptr.*);
}
hash_map.deinit(allocator);
}
fn get_cache_entry(self: *Self, file_type: *const FileType, comptime query_type: QueryType) CacheError!*CacheEntry { fn get_cache_entry(self: *Self, file_type: *const FileType, comptime query_type: QueryType) CacheError!*CacheEntry {
if (self.mutex) |*mtx| mtx.lock(); if (self.mutex) |*mtx| mtx.lock();
defer if (self.mutex) |*mtx| mtx.unlock(); defer if (self.mutex) |*mtx| mtx.unlock();
const hash = switch (query_type) { const hash = switch (query_type) {
.highlights => &self.highlights, .highlights => &self.highlights,
.errors => &self.errors,
.injections => &self.injections, .injections => &self.injections,
}; };
@ -135,6 +137,7 @@ fn get_cached_query(self: *Self, entry: *CacheEntry) Error!?*Query {
const queries = FileType.queries.get(entry.file_type.name) orelse return null; const queries = FileType.queries.get(entry.file_type.name) orelse return null;
const query_bin = switch (entry.query_type) { const query_bin = switch (entry.query_type) {
.highlights => queries.highlights_bin, .highlights => queries.highlights_bin,
.errors => queries.errors_bin,
.injections => queries.injections_bin orelse return null, .injections => queries.injections_bin orelse return null,
}; };
const query, const arena = try deserialize_query(query_bin, lang, self.allocator); const query, const arena = try deserialize_query(query_bin, lang, self.allocator);
@ -151,12 +154,14 @@ fn pre_load_internal(self: *Self, file_type: *const FileType, comptime query_typ
pub fn pre_load(self: *Self, lang_name: []const u8) Error!void { pub fn pre_load(self: *Self, lang_name: []const u8) Error!void {
const file_type = FileType.get_by_name(lang_name) orelse return; const file_type = FileType.get_by_name(lang_name) orelse return;
_ = try self.pre_load_internal(file_type, .highlights); _ = try self.pre_load_internal(file_type, .highlights);
_ = try self.pre_load_internal(file_type, .errors);
_ = try self.pre_load_internal(file_type, .injections); _ = try self.pre_load_internal(file_type, .injections);
} }
fn ReturnType(comptime query_type: QueryType) type { fn ReturnType(comptime query_type: QueryType) type {
return switch (query_type) { return switch (query_type) {
.highlights => *Query, .highlights => *Query,
.errors => *Query,
.injections => ?*Query, .injections => ?*Query,
}; };
} }

View file

@ -138,6 +138,7 @@ fn load_file_types(comptime Namespace: type) []const FileType {
pub const FileTypeQueries = struct { pub const FileTypeQueries = struct {
highlights_bin: []const u8, highlights_bin: []const u8,
errors_bin: []const u8,
injections_bin: ?[]const u8, injections_bin: ?[]const u8,
}; };
@ -145,7 +146,7 @@ pub const queries = std.static_string_map.StaticStringMap(FileTypeQueries).initC
fn load_queries() []const struct { []const u8, FileTypeQueries } { fn load_queries() []const struct { []const u8, FileTypeQueries } {
if (!build_options.use_tree_sitter) return &.{}; if (!build_options.use_tree_sitter) return &.{};
@setEvalBranchQuota(16000); @setEvalBranchQuota(32000);
const queries_cb = @embedFile("syntax_bin_queries"); const queries_cb = @embedFile("syntax_bin_queries");
var iter: []const u8 = queries_cb; var iter: []const u8 = queries_cb;
var len = cbor.decodeMapHeader(&iter) catch |e| { var len = cbor.decodeMapHeader(&iter) catch |e| {
@ -163,6 +164,10 @@ fn load_queries() []const struct { []const u8, FileTypeQueries } {
var iter_: []const u8 = iter; var iter_: []const u8 = iter;
break :blk get_query_value_bin(&iter_, "highlights") orelse @compileError("missing highlights for " ++ lang); break :blk get_query_value_bin(&iter_, "highlights") orelse @compileError("missing highlights for " ++ lang);
}, },
.errors_bin = blk: {
var iter_: []const u8 = iter;
break :blk get_query_value_bin(&iter_, "errors") orelse @compileError("missing errors query for " ++ lang);
},
.injections_bin = blk: { .injections_bin = blk: {
var iter_: []const u8 = iter; var iter_: []const u8 = iter;
break :blk get_query_value_bin(&iter_, "injections"); break :blk get_query_value_bin(&iter_, "injections");

View file

@ -4,6 +4,8 @@ const treez = @import("treez");
pub const tss = @import("ts_serializer.zig"); pub const tss = @import("ts_serializer.zig");
const verbose = false;
pub fn main() anyerror!void { pub fn main() anyerror!void {
const allocator = std.heap.c_allocator; const allocator = std.heap.c_allocator;
const args = try std.process.argsAlloc(allocator); const args = try std.process.argsAlloc(allocator);
@ -33,7 +35,7 @@ pub fn main() anyerror!void {
const lang = file_type.lang_fn() orelse std.debug.panic("tree-sitter parser function failed for language: {s}", .{file_type.name}); const lang = file_type.lang_fn() orelse std.debug.panic("tree-sitter parser function failed for language: {s}", .{file_type.name});
try cbor.writeValue(writer, file_type.name); try cbor.writeValue(writer, file_type.name);
try cbor.writeMapHeader(writer, if (file_type.injections) |_| 2 else 1); try cbor.writeMapHeader(writer, if (file_type.injections) |_| 3 else 2);
const highlights_in = try treez.Query.create(lang, file_type.highlights); const highlights_in = try treez.Query.create(lang, file_type.highlights);
const ts_highlights_in: *tss.TSQuery = @alignCast(@ptrCast(highlights_in)); const ts_highlights_in: *tss.TSQuery = @alignCast(@ptrCast(highlights_in));
@ -43,7 +45,19 @@ pub fn main() anyerror!void {
try cbor.writeValue(writer, "highlights"); try cbor.writeValue(writer, "highlights");
try cbor.writeValue(writer, highlights_cb); try cbor.writeValue(writer, highlights_cb);
// std.log.info("file_type {s} highlights {d} bytes", .{ file_type.name, highlights_cb.len }); if (verbose)
std.log.info("file_type {s} highlights {d} bytes", .{ file_type.name, highlights_cb.len });
const errors_in = try treez.Query.create(lang, "(ERROR) @error");
const ts_errors_in: *tss.TSQuery = @alignCast(@ptrCast(errors_in));
const errors_cb = try tss.toCbor(ts_errors_in, allocator);
defer allocator.free(errors_cb);
try cbor.writeValue(writer, "errors");
try cbor.writeValue(writer, errors_cb);
if (verbose)
std.log.info("file_type {s} errors {d} bytes", .{ file_type.name, errors_cb.len });
if (file_type.injections) |injections| { if (file_type.injections) |injections| {
const injections_in = try treez.Query.create(lang, injections); const injections_in = try treez.Query.create(lang, injections);
@ -54,12 +68,14 @@ pub fn main() anyerror!void {
try cbor.writeValue(writer, "injections"); try cbor.writeValue(writer, "injections");
try cbor.writeValue(writer, injections_cb); try cbor.writeValue(writer, injections_cb);
// std.log.info("file_type {s} injections {d} bytes", .{ file_type.name, injections_cb.len }); if (verbose)
std.log.info("file_type {s} injections {d} bytes", .{ file_type.name, injections_cb.len });
} }
} }
try output_file.writeAll(output.items); try output_file.writeAll(output.items);
// std.log.info("file_types total {d} bytes", .{output.items.len}); if (verbose)
std.log.info("file_types total {d} bytes", .{output.items.len});
} }
fn fatal(comptime format: []const u8, args: anytype) noreturn { fn fatal(comptime format: []const u8, args: anytype) noreturn {