From 585f84dc7a72fbbd0d526863f5538445cb58b51f Mon Sep 17 00:00:00 2001 From: CJ van den Berg Date: Sat, 22 Mar 2025 21:56:00 +0100 Subject: [PATCH] feat: use binary tree-sitter queries created at compile time --- src/syntax/build.zig | 229 +++++++++++---------- src/syntax/build.zig.zon | 4 + src/syntax/src/QueryCache.zig | 49 ++++- src/syntax/src/file_type.zig | 79 ++++++-- src/syntax/src/ts_bin_query_gen.zig | 123 ++++++++++++ src/syntax/src/ts_serializer.zig | 295 ++++++++++++++++++++++++++++ 6 files changed, 645 insertions(+), 134 deletions(-) create mode 100644 src/syntax/src/ts_bin_query_gen.zig create mode 100644 src/syntax/src/ts_serializer.zig diff --git a/src/syntax/build.zig b/src/syntax/build.zig index f9a3105..dc99d3b 100644 --- a/src/syntax/build.zig +++ b/src/syntax/build.zig @@ -14,114 +14,129 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }); - const imports: []const std.Build.Module.Import = if (use_tree_sitter) &.{ - .{ .name = "build_options", .module = options_mod }, - .{ .name = "treez", .module = tree_sitter_dep.module("treez") }, - ts_queryfile(b, tree_sitter_dep, "queries/cmake/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-agda/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-astro/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-bash/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-c-sharp/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-c/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-cpp/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-css/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-diff/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-dockerfile/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-elixir/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-git-rebase/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-gitcommit/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-gleam/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-go/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-fish/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-haskell/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-hare/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-html/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-java/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-javascript/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-jsdoc/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-json/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-julia/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-kdl/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-lua/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-mail/queries/mail/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-make/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-markdown/tree-sitter-markdown/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-markdown/tree-sitter-markdown-inline/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-nasm/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-nim/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-ninja/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-nix/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-nu/queries/nu/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-ocaml/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-odin/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-openscad/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-org/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-php/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-python/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-purescript/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-regex/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-ruby/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-rust/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-ssh-config/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-scala/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-scheme/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-superhtml/tree-sitter-superhtml/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-sql/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-swift/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-toml/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-typescript/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-typst/queries/typst/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-vim/queries/vim/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-xml/queries/dtd/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-xml/queries/xml/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-yaml/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-zig/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-ziggy/tree-sitter-ziggy/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-ziggy/tree-sitter-ziggy-schema/queries/highlights.scm"), - ts_queryfile(b, tree_sitter_dep, "nvim-treesitter/queries/verilog/highlights.scm"), - - ts_queryfile(b, tree_sitter_dep, "queries/cmake/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-astro/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-cpp/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-elixir/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-gitcommit/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-hare/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-html/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-javascript/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-kdl/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-lua/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-markdown/tree-sitter-markdown-inline/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-markdown/tree-sitter-markdown/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-nasm/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-nix/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-nu/queries/nu/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-odin/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-openscad/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-php/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-purescript/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-purescript/vim_queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-rust/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-superhtml/tree-sitter-superhtml/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-swift/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-typst/queries/typst/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-vim/queries/vim/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "tree-sitter-zig/queries/injections.scm"), - ts_queryfile(b, tree_sitter_dep, "nvim-treesitter/queries/verilog/injections.scm"), - } else &.{ - .{ .name = "build_options", .module = options_mod }, - }; - - _ = b.addModule("syntax", .{ - .root_source_file = b.path("src/syntax.zig"), - .imports = imports, + const cbor_dep = b.dependency("cbor", .{ + .target = target, + .optimize = optimize, }); + + const ts_bin_query_gen = b.addExecutable(.{ + .name = "ts_bin_query_gen", + .target = target, + .root_source_file = b.path("src/ts_bin_query_gen.zig"), + }); + ts_bin_query_gen.linkLibC(); + ts_bin_query_gen.root_module.addImport("cbor", cbor_dep.module("cbor")); + ts_bin_query_gen.root_module.addImport("treez", tree_sitter_dep.module("treez")); + ts_bin_query_gen.root_module.addImport("build_options", options_mod); + + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "queries/cmake/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-agda/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-astro/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-bash/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-c-sharp/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-c/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-cpp/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-css/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-diff/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-dockerfile/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-elixir/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-git-rebase/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-gitcommit/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-gleam/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-go/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-fish/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-haskell/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-hare/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-html/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-java/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-javascript/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-jsdoc/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-json/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-julia/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-kdl/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-lua/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-mail/queries/mail/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-make/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-markdown/tree-sitter-markdown/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-markdown/tree-sitter-markdown-inline/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-nasm/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-nim/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-ninja/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-nix/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-nu/queries/nu/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-ocaml/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-odin/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-openscad/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-org/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-php/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-python/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-purescript/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-regex/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-ruby/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-rust/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-ssh-config/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-scala/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-scheme/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-superhtml/tree-sitter-superhtml/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-sql/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-swift/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-toml/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-typescript/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-typst/queries/typst/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-vim/queries/vim/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-xml/queries/dtd/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-xml/queries/xml/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-yaml/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-zig/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-ziggy/tree-sitter-ziggy/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-ziggy/tree-sitter-ziggy-schema/queries/highlights.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "nvim-treesitter/queries/verilog/highlights.scm"); + + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "queries/cmake/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-astro/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-cpp/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-elixir/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-gitcommit/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-hare/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-html/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-javascript/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-kdl/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-lua/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-markdown/tree-sitter-markdown-inline/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-markdown/tree-sitter-markdown/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-nasm/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-nix/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-nu/queries/nu/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-odin/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-openscad/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-php/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-purescript/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-purescript/vim_queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-rust/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-superhtml/tree-sitter-superhtml/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-swift/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-typst/queries/typst/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-vim/queries/vim/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "tree-sitter-zig/queries/injections.scm"); + ts_queryfile(b, tree_sitter_dep, ts_bin_query_gen, "nvim-treesitter/queries/verilog/injections.scm"); + + const syntax_mod = b.addModule("syntax", .{ + .root_source_file = b.path("src/syntax.zig"), + .imports = &.{ + .{ .name = "build_options", .module = options_mod }, + .{ .name = "cbor", .module = cbor_dep.module("cbor") }, + .{ .name = "treez", .module = tree_sitter_dep.module("treez") }, + }, + }); + + if (use_tree_sitter) { + const ts_bin_query_gen_step = b.addRunArtifact(ts_bin_query_gen); + const output = ts_bin_query_gen_step.addOutputFileArg("bin_queries.cbor"); + syntax_mod.addAnonymousImport("syntax_bin_queries", .{ .root_source_file = output }); + } } -fn ts_queryfile(b: *std.Build, dep: *std.Build.Dependency, comptime sub_path: []const u8) std.Build.Module.Import { - return .{ - .name = sub_path, - .module = b.createModule(.{ - .root_source_file = dep.path(sub_path), - }), - }; +fn ts_queryfile(b: *std.Build, dep: *std.Build.Dependency, bin_gen: *std.Build.Step.Compile, comptime sub_path: []const u8) void { + const module = b.createModule(.{ .root_source_file = dep.path(sub_path) }); + bin_gen.root_module.addImport(sub_path, module); } diff --git a/src/syntax/build.zig.zon b/src/syntax/build.zig.zon index eb592e2..345f3d7 100644 --- a/src/syntax/build.zig.zon +++ b/src/syntax/build.zig.zon @@ -9,6 +9,10 @@ .url = "https://github.com/neurocyte/tree-sitter/releases/download/master-86dd4d2536f2748c5b4ea0e1e70678039a569aac/source.tar.gz", .hash = "N-V-__8AACablCbp-6lsRoKDEp6Xd2dHLe4AsW81blkSQxzs", }, + .cbor = .{ + .url = "https://github.com/neurocyte/cbor/archive/1fccb83c70cd84e1dff57cc53f7db8fb99909a94.tar.gz", + .hash = "cbor-1.0.0-RcQE_HvqAACcrLH7t3IDZOshgY2xqJA_UX330MvwSepb", + }, }, .paths = .{ "src", diff --git a/src/syntax/src/QueryCache.zig b/src/syntax/src/QueryCache.zig index 5c8ec26..722db31 100644 --- a/src/syntax/src/QueryCache.zig +++ b/src/syntax/src/QueryCache.zig @@ -8,6 +8,7 @@ else const Self = @This(); +pub const tss = @import("ts_serializer.zig"); pub const FileType = @import("file_type.zig"); const Query = treez.Query; @@ -20,8 +21,19 @@ ref_count: usize = 1, const CacheEntry = struct { mutex: ?std.Thread.Mutex, query: ?*Query, - file_type: *const FileType, + query_arena: ?*std.heap.ArenaAllocator, query_type: QueryType, + file_type: *const FileType, + + fn destroy(self: *@This(), allocator: std.mem.Allocator) void { + if (self.query_arena) |a| { + a.deinit(); + allocator.destroy(a); + } else if (self.query) |q| + q.destroy(); + self.query_arena = null; + self.query = null; + } }; pub const QueryType = enum { @@ -43,7 +55,7 @@ const CacheError = error{ OutOfMemory, }; -pub const Error = CacheError || QueryParseError; +pub const Error = CacheError || QueryParseError || QuerySerializeError; pub fn create(allocator: std.mem.Allocator, opts: struct { lock: bool = false }) !*Self { const self = try allocator.create(Self); @@ -74,13 +86,13 @@ fn release_ref_unlocked_and_maybe_destroy(self: *Self) void { var iter_highlights = self.highlights.iterator(); while (iter_highlights.next()) |p| { self.allocator.free(p.key_ptr.*); - if (p.value_ptr.*.query) |q| q.destroy(); + p.value_ptr.*.destroy(self.allocator); self.allocator.destroy(p.value_ptr.*); } var iter_injections = self.injections.iterator(); while (iter_injections.next()) |p| { self.allocator.free(p.key_ptr.*); - if (p.value_ptr.*.query) |q| q.destroy(); + p.value_ptr.*.destroy(self.allocator); self.allocator.destroy(p.value_ptr.*); } self.highlights.deinit(self.allocator); @@ -103,6 +115,7 @@ fn get_cache_entry(self: *Self, file_type: *const FileType, comptime query_type: const q = try self.allocator.create(CacheEntry); q.* = .{ .query = null, + .query_arena = null, .mutex = if (self.mutex) |_| .{} else null, .file_type = file_type, .query_type = query_type, @@ -113,16 +126,20 @@ fn get_cache_entry(self: *Self, file_type: *const FileType, comptime query_type: }; } -fn get_cached_query(_: *Self, entry: *CacheEntry) QueryParseError!?*Query { +fn get_cached_query(self: *Self, entry: *CacheEntry) Error!?*Query { if (entry.mutex) |*mtx| mtx.lock(); defer if (entry.mutex) |*mtx| mtx.unlock(); return if (entry.query) |query| query else blk: { const lang = entry.file_type.lang_fn() orelse std.debug.panic("tree-sitter parser function failed for language: {s}", .{entry.file_type.name}); - entry.query = try Query.create(lang, switch (entry.query_type) { - .highlights => entry.file_type.highlights, - .injections => if (entry.file_type.injections) |injections| injections else return null, - }); + const queries = FileType.queries.get(entry.file_type.name) orelse return null; + const query_bin = switch (entry.query_type) { + .highlights => queries.highlights_bin, + .injections => queries.injections_bin orelse return null, + }; + const query, const arena = try deserialize_query(query_bin, lang, self.allocator); + entry.query = query; + entry.query_arena = arena; break :blk entry.query.?; }; } @@ -158,3 +175,17 @@ pub fn release(self: *Self, query: *Query, comptime query_type: QueryType) void _ = query_type; self.release_ref_unlocked_and_maybe_destroy(); } + +pub const QuerySerializeError = (tss.SerializeError || tss.DeserializeError); + +fn deserialize_query(query_bin: []const u8, language: ?*const treez.Language, allocator: std.mem.Allocator) QuerySerializeError!struct { *Query, *std.heap.ArenaAllocator } { + std.log.warn("deserialize_query", .{}); + + var ts_query_out, const arena = try tss.fromCbor(query_bin, allocator); + std.log.warn("decoded TSQuery", .{}); + + ts_query_out.language = @intFromPtr(language); + + const query_out: *Query = @alignCast(@ptrCast(ts_query_out)); + return .{ query_out, arena }; +} diff --git a/src/syntax/src/file_type.zig b/src/syntax/src/file_type.zig index d6c4445..f8aa5f3 100644 --- a/src/syntax/src/file_type.zig +++ b/src/syntax/src/file_type.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const cbor = @import("cbor"); const build_options = @import("build_options"); const treez = if (build_options.use_tree_sitter) @@ -14,8 +15,6 @@ name: []const u8, description: []const u8, lang_fn: LangFn, extensions: []const []const u8, -highlights: [:0]const u8, -injections: ?[:0]const u8, first_line_matches: ?FirstLineMatch = null, comment: []const u8, formatter: ?[]const []const u8, @@ -124,22 +123,6 @@ fn load_file_types(comptime Namespace: type) []const FileType { .lang_fn = if (@hasField(@TypeOf(args), "parser")) args.parser else get_parser(lang), .extensions = vec(args.extensions), .comment = args.comment, - .highlights = if (build_options.use_tree_sitter) - if (@hasField(@TypeOf(args), "highlights")) - @embedFile(args.highlights) - else if (@hasField(@TypeOf(args), "highlights_list")) - @embedFile(args.highlights_list[0]) ++ "\n" ++ @embedFile(args.highlights_list[1]) - else - @embedFile("tree-sitter-" ++ lang ++ "/queries/highlights.scm") - else - "", - .injections = if (build_options.use_tree_sitter) - if (@hasField(@TypeOf(args), "injections")) - @embedFile(args.injections) - else - null - else - null, .first_line_matches = if (@hasField(@TypeOf(args), "first_line_matches")) args.first_line_matches else null, .formatter = if (@hasField(@TypeOf(args), "formatter")) vec(args.formatter) else null, .language_server = if (@hasField(@TypeOf(args), "language_server")) vec(args.language_server) else null, @@ -152,3 +135,63 @@ fn load_file_types(comptime Namespace: type) []const FileType { else => @compileError("expected tuple or struct type"), }; } + +pub const FileTypeQueries = struct { + highlights_bin: []const u8, + injections_bin: ?[]const u8, +}; + +pub const queries = std.static_string_map.StaticStringMap(FileTypeQueries).initComptime(load_queries()); + +fn load_queries() []const struct { []const u8, FileTypeQueries } { + if (!build_options.use_tree_sitter) return &.{}; + @setEvalBranchQuota(16000); + const queries_cb = @embedFile("syntax_bin_queries"); + var iter: []const u8 = queries_cb; + var len = cbor.decodeMapHeader(&iter) catch |e| { + @compileLog("cbor.decodeMapHeader", e); + @compileError("invalid syntax_bin_queries"); + }; + var construct_types: [len]struct { []const u8, FileTypeQueries } = undefined; + var i = 0; + while (len > 0) : (len -= 1) { + var lang: []const u8 = undefined; + if (!try cbor.matchString(&iter, &lang)) + @compileError("invalid language name field"); + construct_types[i] = .{ lang, .{ + .highlights_bin = blk: { + var iter_: []const u8 = iter; + break :blk get_query_value_bin(&iter_, "highlights") orelse @compileError("missing highlights for " ++ lang); + }, + .injections_bin = blk: { + var iter_: []const u8 = iter; + break :blk get_query_value_bin(&iter_, "injections"); + }, + } }; + try cbor.skipValue(&iter); + i += 1; + } + const types = construct_types; + return &types; +} + +fn get_query_value_bin(iter: *[]const u8, comptime query: []const u8) ?[]const u8 { + var len = cbor.decodeMapHeader(iter) catch |e| { + @compileLog("cbor.decodeMapHeader", e); + @compileError("invalid query map in syntax_bin_queries"); + }; + while (len > 0) : (len -= 1) { + var query_name: []const u8 = undefined; + if (!try cbor.matchString(iter, &query_name)) + @compileError("invalid query name field"); + if (std.mem.eql(u8, query_name, query)) { + var query_value: []const u8 = undefined; + if (try cbor.matchValue(iter, cbor.extract(&query_value))) + return query_value; + @compileError("invalid query value field"); + } else { + try cbor.skipValue(iter); + } + } + return null; +} diff --git a/src/syntax/src/ts_bin_query_gen.zig b/src/syntax/src/ts_bin_query_gen.zig new file mode 100644 index 0000000..8dc4bed --- /dev/null +++ b/src/syntax/src/ts_bin_query_gen.zig @@ -0,0 +1,123 @@ +const std = @import("std"); +const cbor = @import("cbor"); +const treez = @import("treez"); + +pub const tss = @import("ts_serializer.zig"); + +pub fn main() anyerror!void { + const allocator = std.heap.c_allocator; + const args = try std.process.argsAlloc(allocator); + + var opt_output_file_path: ?[]const u8 = null; + + var i: usize = 1; + while (i < args.len) : (i += 1) { + const arg = args[i]; + if (opt_output_file_path != null) fatal("duplicated {s} argument", .{arg}); + opt_output_file_path = args[i]; + } + + const output_file_path = opt_output_file_path orelse fatal("missing output file", .{}); + var output_file = std.fs.cwd().createFile(output_file_path, .{}) catch |err| { + fatal("unable to open '{s}': {s}", .{ output_file_path, @errorName(err) }); + }; + defer output_file.close(); + + var output = std.ArrayList(u8).init(allocator); + defer output.deinit(); + const writer = output.writer(); + + try cbor.writeMapHeader(writer, file_types.len); + + for (file_types) |file_type| { + const lang = file_type.lang_fn() orelse std.debug.panic("tree-sitter parser function failed for language: {s}", .{file_type.name}); + + try cbor.writeValue(writer, file_type.name); + try cbor.writeMapHeader(writer, if (file_type.injections) |_| 2 else 1); + + const highlights_in = try treez.Query.create(lang, file_type.highlights); + const ts_highlights_in: *tss.TSQuery = @alignCast(@ptrCast(highlights_in)); + + const highlights_cb = try tss.toCbor(ts_highlights_in, allocator); + defer allocator.free(highlights_cb); + + try cbor.writeValue(writer, "highlights"); + try cbor.writeValue(writer, highlights_cb); + std.log.info("file_type {s} highlights {d} bytes", .{ file_type.name, highlights_cb.len }); + + if (file_type.injections) |injections| { + const injections_in = try treez.Query.create(lang, injections); + const ts_injections_in: *tss.TSQuery = @alignCast(@ptrCast(injections_in)); + + const injections_cb = try tss.toCbor(ts_injections_in, allocator); + defer allocator.free(injections_cb); + + try cbor.writeValue(writer, "injections"); + try cbor.writeValue(writer, injections_cb); + std.log.info("file_type {s} injections {d} bytes", .{ file_type.name, injections_cb.len }); + } + } + + try output_file.writeAll(output.items); +} + +fn fatal(comptime format: []const u8, args: anytype) noreturn { + std.debug.print(format, args); + std.process.exit(1); +} + +pub const file_types = load_file_types(@import("file_types.zig")); + +const FileType = struct { + name: []const u8, + lang_fn: LangFn, + highlights: [:0]const u8, + injections: ?[:0]const u8, +}; +const LangFn = *const fn () callconv(.C) ?*const treez.Language; + +fn load_file_types(comptime Namespace: type) []const FileType { + comptime switch (@typeInfo(Namespace)) { + .@"struct" => |info| { + var count = 0; + for (info.decls) |_| count += 1; + var construct_types: [count]FileType = undefined; + var i = 0; + for (info.decls) |decl| { + const lang = decl.name; + const args = @field(Namespace, lang); + construct_types[i] = .{ + .name = lang, + .lang_fn = if (@hasField(@TypeOf(args), "parser")) args.parser else get_parser(lang), + .highlights = if (@hasField(@TypeOf(args), "highlights")) + @embedFile(args.highlights) + else if (@hasField(@TypeOf(args), "highlights_list")) + @embedFile(args.highlights_list[0]) ++ "\n" ++ @embedFile(args.highlights_list[1]) + else + @embedFile("tree-sitter-" ++ lang ++ "/queries/highlights.scm"), + .injections = if (@hasField(@TypeOf(args), "injections")) + @embedFile(args.injections) + else + null, + }; + i += 1; + } + const types = construct_types; + return &types; + }, + else => @compileError("expected tuple or struct type"), + }; +} + +fn get_parser(comptime lang: []const u8) LangFn { + const language_name = ft_func_name(lang); + return @extern(?LangFn, .{ .name = "tree_sitter_" ++ language_name }) orelse @compileError(std.fmt.comptimePrint("Cannot find extern tree_sitter_{s}", .{language_name})); +} + +fn ft_func_name(comptime lang: []const u8) []const u8 { + var transform: [lang.len]u8 = undefined; + for (lang, 0..) |c, i| + transform[i] = if (c == '-') '_' else c; + const func_name = transform; + return &func_name; +} diff --git a/src/syntax/src/ts_serializer.zig b/src/syntax/src/ts_serializer.zig new file mode 100644 index 0000000..973c1f7 --- /dev/null +++ b/src/syntax/src/ts_serializer.zig @@ -0,0 +1,295 @@ +/// This file *MUST* be kept in sync with tree-sitter/lib/src/query.c +/// It exactly represents the C structures in memory and must produce +/// the exact same results as the C tree-sitter library version used. +/// +/// Yes,... it is not a public API! Here be dragons! +/// +const std = @import("std"); +const cbor = @import("cbor"); +const build_options = @import("build_options"); +const treez = if (build_options.use_tree_sitter) @import("treez") else @import("treez_dummy.zig"); + +pub const Slice = extern struct { + offset: u32, + length: u32, + + pub fn cborEncode(self: *const @This(), writer: anytype) !void { + return cbor.writeArray(writer, self.*); + } + + pub fn cborExtract(self: *@This(), iter: *[]const u8) cbor.Error!bool { + return cbor.matchValue(iter, .{ + cbor.extract(&self.offset), + cbor.extract(&self.length), + }); + } +}; + +pub fn Array(T: type) type { + return extern struct { + contents: ?*T, + size: u32, + capacity: u32, + + pub fn cborEncode(self: *const @This(), writer: anytype) !void { + if (self.contents) |contents| { + const arr: []T = @as([*]T, @ptrCast(contents))[0..self.size]; + try cbor.writeValue(writer, arr); + return; + } + try cbor.writeValue(writer, null); + } + + pub fn cborExtract(self: *@This(), iter: *[]const u8, allocator: std.mem.Allocator) cbor.Error!bool { + var iter_ = iter.*; + if (cbor.matchValue(&iter_, cbor.null_) catch false) { + iter.* = iter_; + self.contents = null; + self.size = 0; + self.capacity = 0; + return true; + } + + if (T == u8) { + var arr: []const u8 = undefined; + if (try cbor.matchValue(iter, cbor.extract(&arr))) { + self.contents = @constCast(@ptrCast(arr.ptr)); + self.size = @intCast(arr.len); + self.capacity = @intCast(arr.len); + return true; + } + return false; + } + + var i: usize = 0; + var n = try cbor.decodeArrayHeader(iter); + var arr: []T = try allocator.alloc(T, n); + while (n > 0) : (n -= 1) { + if (comptime cbor.isExtractableAlloc(T)) { + if (!(cbor.matchValue(iter, cbor.extractAlloc(&arr[i], allocator)) catch return false)) + return false; + } else { + if (!(cbor.matchValue(iter, cbor.extract(&arr[i])) catch return false)) + return false; + } + i += 1; + } + self.contents = @constCast(@ptrCast(arr.ptr)); + self.size = @intCast(arr.len); + self.capacity = @intCast(arr.len); + return true; + } + }; +} + +pub const SymbolTable = extern struct { + characters: Array(u8), + slices: Array(Slice), + + pub fn cborEncode(self: *const @This(), writer: anytype) !void { + return cbor.writeArray(writer, self.*); + } + + pub fn cborExtract(self: *@This(), iter: *[]const u8, allocator: std.mem.Allocator) cbor.Error!bool { + return cbor.matchValue(iter, .{ + cbor.extractAlloc(&self.characters, allocator), + cbor.extractAlloc(&self.slices, allocator), + }); + } +}; +pub const CaptureQuantifiers = Array(u8); +pub const PatternEntry = extern struct { + step_index: u16, + pattern_index: u16, + is_rooted: bool, + + pub fn cborEncode(self: *const @This(), writer: anytype) !void { + return cbor.writeArray(writer, self.*); + } + + pub fn cborExtract(self: *@This(), iter: *[]const u8) cbor.Error!bool { + return cbor.matchValue(iter, .{ + cbor.extract(&self.step_index), + cbor.extract(&self.pattern_index), + cbor.extract(&self.is_rooted), + }); + } +}; +pub const QueryPattern = extern struct { + steps: Slice, + predicate_steps: Slice, + start_byte: u32, + end_byte: u32, + is_non_local: bool, + + pub fn cborEncode(self: *const @This(), writer: anytype) !void { + return cbor.writeArray(writer, self.*); + } + + pub fn cborExtract(self: *@This(), iter: *[]const u8, allocator: std.mem.Allocator) cbor.Error!bool { + return cbor.matchValue(iter, .{ + cbor.extractAlloc(&self.steps, allocator), + cbor.extractAlloc(&self.predicate_steps, allocator), + cbor.extract(&self.start_byte), + cbor.extract(&self.end_byte), + cbor.extract(&self.is_non_local), + }); + } +}; +pub const StepOffset = extern struct { + byte_offset: u32, + step_index: u16, + + pub fn cborEncode(self: *const @This(), writer: anytype) !void { + return cbor.writeArray(writer, self.*); + } + + pub fn cborExtract(self: *@This(), iter: *[]const u8) cbor.Error!bool { + return cbor.matchValue(iter, .{ + cbor.extract(&self.byte_offset), + cbor.extract(&self.step_index), + }); + } +}; + +pub const MAX_STEP_CAPTURE_COUNT = 3; + +pub const TSSymbol = u16; +pub const TSFieldId = u16; + +pub const QueryStep = extern struct { + symbol: TSSymbol, + supertype_symbol: TSSymbol, + field: TSFieldId, + capture_ids: [MAX_STEP_CAPTURE_COUNT]u16, + depth: u16, + alternative_index: u16, + negated_field_list_id: u16, + // is_named: u1, + // is_immediate: u1, + // is_last_child: u1, + // is_pass_through: u1, + // is_dead_end: u1, + // alternative_is_immediate: u1, + // contains_captures: u1, + // root_pattern_guaranteed: u1, + flags8: u8, + // parent_pattern_guaranteed: u1, + flags16: u8, + + pub fn cborEncode(self: *const @This(), writer: anytype) !void { + return cbor.writeArray(writer, self.*); + } + + pub fn cborExtract(self: *@This(), iter: *[]const u8) cbor.Error!bool { + return cbor.matchValue(iter, .{ + cbor.extract(&self.symbol), + cbor.extract(&self.supertype_symbol), + cbor.extract(&self.field), + cbor.extract(&self.capture_ids), + cbor.extract(&self.depth), + cbor.extract(&self.alternative_index), + cbor.extract(&self.negated_field_list_id), + cbor.extract(&self.flags8), + cbor.extract(&self.flags16), + }); + } +}; + +pub const PredicateStep = extern struct { + pub const Type = enum(c_uint) { + done, + capture, + string, + }; + + type: Type, + value_id: u32, + + pub fn cborEncode(self: *const @This(), writer: anytype) !void { + return cbor.writeArray(writer, self.*); + } + + pub fn cborExtract(self: *@This(), iter: *[]const u8) cbor.Error!bool { + return cbor.matchValue(iter, .{ + cbor.extract(&self.type), + cbor.extract(&self.value_id), + }); + } +}; + +pub const TSQuery = extern struct { + captures: SymbolTable, + predicate_values: SymbolTable, + capture_quantifiers: Array(CaptureQuantifiers), + steps: Array(QueryStep), + pattern_map: Array(PatternEntry), + predicate_steps: Array(PredicateStep), + patterns: Array(QueryPattern), + step_offsets: Array(StepOffset), + negated_fields: Array(TSFieldId), + string_buffer: Array(u8), + repeat_symbols_with_rootless_patterns: Array(TSSymbol), + language: usize, + // language: ?*const treez.Language, + wildcard_root_pattern_count: u16, + + pub fn cborEncode(self: *const @This(), writer: anytype) !void { + return cbor.writeArray(writer, self.*); + } + + pub fn cborExtract(self: *@This(), iter: *[]const u8, allocator: std.mem.Allocator) cbor.Error!bool { + const result = cbor.matchValue(iter, .{ + cbor.extractAlloc(&self.captures, allocator), + cbor.extractAlloc(&self.predicate_values, allocator), + cbor.extractAlloc(&self.capture_quantifiers, allocator), + cbor.extractAlloc(&self.steps, allocator), + cbor.extractAlloc(&self.pattern_map, allocator), + cbor.extractAlloc(&self.predicate_steps, allocator), + cbor.extractAlloc(&self.patterns, allocator), + cbor.extractAlloc(&self.step_offsets, allocator), + cbor.extractAlloc(&self.negated_fields, allocator), + cbor.extractAlloc(&self.string_buffer, allocator), + cbor.extractAlloc(&self.repeat_symbols_with_rootless_patterns, allocator), + cbor.extract(&self.language), + cbor.extract(&self.wildcard_root_pattern_count), + }); + self.language = 0; + return result; + } +}; + +pub const SerializeError = error{OutOfMemory}; + +pub fn toCbor(query: *TSQuery, allocator: std.mem.Allocator) SerializeError![]const u8 { + var cb: std.ArrayListUnmanaged(u8) = .empty; + defer cb.deinit(allocator); + try cbor.writeValue(cb.writer(allocator), query.*); + return cb.toOwnedSlice(allocator); +} + +pub const DeserializeError = error{ + OutOfMemory, + IntegerTooLarge, + IntegerTooSmall, + InvalidType, + TooShort, + InvalidFloatType, + InvalidArrayType, + InvalidPIntType, + JsonIncompatibleType, + InvalidQueryCbor, + NotAnObject, +}; + +pub fn fromCbor(cb: []const u8, allocator: std.mem.Allocator) DeserializeError!struct { *TSQuery, *std.heap.ArenaAllocator } { + var arena = try allocator.create(std.heap.ArenaAllocator); + arena.* = std.heap.ArenaAllocator.init(allocator); + errdefer arena.deinit(); + const query = try arena.allocator().create(TSQuery); + query.* = undefined; + var iter: []const u8 = cb; + if (!try cbor.matchValue(&iter, cbor.extractAlloc(query, arena.allocator()))) + return error.InvalidQueryCbor; + return .{ query, arena }; +}