Compile new shell parser by default (#16660)

* Move radare2-shell-parser/tree-sitter into shlr * compile new shell parser by default, available with cfg.newshell variable * Add README for radare2-shell-parser * Improve CI * Add gitattributes file
2020-04-21 13:44:30 +02:00 · 2020-04-21 13:44:30 +02:00 · 06ab29b93c
parent 1b56d63df2
commit 06ab29b93c
84 changed files with 69756 additions and 228 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1,3 @@
+shlr/radare2-shell-parser/src/grammar.json linguist-generated=true
+shlr/radare2-shell-parser/src/node-types.json linguist-generated=true
+shlr/radare2-shell-parser/src/parser.c linguist-generated=true
--- a/.github/workflows/newshell-treesitter-tests.yml
+++ b/.github/workflows/newshell-treesitter-tests.yml
@ -0,0 +1,26 @@
+name: Radare2 CI tree-sitter test
+
+on:
+  pull_request:
+    paths:
+      - 'shlr/tree-sitter/*'
+      - 'shlr/radare2-shell-parser/*'
+    branches:
+      - master
+
+jobs:
+  build:
+    name: radare2-shell-parser-tests
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-node@v1
+      with:
+        node-version: '12.x'
+    - run: cd shlr/radare2-shell-parser/ && npm install
+    - name: Run tests
+      run: |
+        cd shlr/radare2-shell-parser
+        export PATH=${PATH}:./node_modules/.bin
+        tree-sitter generate
+        tree-sitter test
--- a/.github/workflows/newshell.yml
+++ b/.github/workflows/newshell.yml
@ -1,58 +0,0 @@
-name: Radare2 CI newshell
-
-on:
-  pull_request:
-    branches:
-    - master
-  push:
-    branches:
-    - master
-
-jobs:
-  build:
-    name: ${{ matrix.name }}
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        name: [linux-meson-newshell]
-        include:
-          - name: linux-meson-newshell
-            os: ubuntu-latest
-            build_system: meson
-            compiler: gcc
-            meson_options: -Duse_treesitter=true
-            newshell: newshell
-
-    steps:
-    - uses: actions/checkout@v2
-    - name: Install meson and ninja
-      run: sudo apt-get --assume-yes install python3-wheel python3-setuptools && pip3 install --user meson ninja
-    - name: Checkout our Testsuite Binaries
-      uses: actions/checkout@v2
-      with:
-          repository: radareorg/radare2-testbins
-          path: test/bins
-    - name: Build with Meson
-      run: |
-        export PATH=${HOME}/.local/bin:${PATH}
-        meson ${{ matrix.meson_options }} --prefix=${HOME} build && ninja -C build
-      env:
-        CC: ${{ matrix.compiler }}
-    - name: Install with meson
-      run: |
-        # Install the radare2
-        export PATH=${HOME}/bin:${HOME}/.local/bin:${PATH}
-        export LD_LIBRARY_PATH=${HOME}/lib/$(uname -m)-linux-gnu:${HOME}/lib:${HOME}/lib64:${LD_LIBRARY_PATH}
-        export PKG_CONFIG_PATH=${HOME}/lib/pkgconfig:${HOME}/lib/$(uname -m)-linux-gnu/pkgconfig:${PKG_CONFIG_PATH}
-        ninja -C build install
-    - name: Run tests
-      if: startswith(github.event.pull_request.head.ref, 'newshell-') || github.event_name == 'push'
-      run: |
-        # Running the test suite
-        export PATH=${HOME}/bin:${HOME}/.local/bin:${PATH}
-        export LD_LIBRARY_PATH=${HOME}/lib/$(uname -m)-linux-gnu:${HOME}/lib:${HOME}/lib64:${LD_LIBRARY_PATH}
-        export PKG_CONFIG_PATH=${HOME}/lib/pkgconfig:${HOME}/lib/$(uname -m)-linux-gnu/pkgconfig:${PKG_CONFIG_PATH}
-        export R2_CFG_NEWSHELL=1
-        cd test
-        make
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@ -11,7 +11,7 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        name: [linux-acr-gcc-tests, linux-acr-clang-build, linux-meson-gcc-build, macos-clang-tests]
+        name: [linux-acr-gcc-tests, linux-acr-clang-build, linux-meson-gcc-build, linux-meson-gcc-newshell-tests, macos-clang-tests]
        include:
          - name: linux-acr-gcc-tests
            os: ubuntu-latest
@ -26,6 +26,12 @@ jobs:
            os: ubuntu-latest
            build_system: meson
            compiler: gcc
+          - name: linux-meson-gcc-newshell-tests
+            os: ubuntu-latest
+            build_system: meson
+            compiler: gcc
+            run_tests: true
+            newshell: true
          - name: macos-clang-tests
            os: macos-latest
            build_system: acr
@ -83,6 +89,12 @@ jobs:
        export PATH=${HOME}/bin:${HOME}/.local/bin:${PATH}
        export LD_LIBRARY_PATH=${HOME}/lib/$(uname -m)-linux-gnu:${HOME}/lib:${HOME}/lib64:${LD_LIBRARY_PATH}
        export PKG_CONFIG_PATH=${HOME}/lib/pkgconfig:${HOME}/lib/$(uname -m)-linux-gnu/pkgconfig:${PKG_CONFIG_PATH}
+        if [ "$NEWSHELL" == "true" ]; then
+          export R2_CFG_NEWSHELL=1
+        fi
        cd test
+        radare2 -N -Qc 'e cfg.newshell' -
        make
+      env:
+        NEWSHELL: ${{ matrix.newshell }}

--- a/.gitignore
+++ b/.gitignore
@ -58,8 +58,6 @@ libr/include/r_userconf.h
 libr/include/r_version.h
 libr/include/r_version.h.tmp
 shlr/capstone/
-shlr/tree-sitter/
-shlr/radare2-shell-parser/
 shlr/java/out
 shlr/java/out.exe
 shlr/sdb/sdb
--- a/24
+++ b/24
@ -22,7 +22,6 @@ DEBUGGER=1
 USE_MAGIC=0
 CSNEXT=0
 LOADLIBS=1
-USE_TREESITTER=0
 HAVE_FORK=1
 WANT_PTRACE_WRAP=1
 WITH_LIBR=0
@ -168,13 +167,11 @@ System types:
  --target=TARGET        configure for building compilers for TARGET [HOST]
 EOF2

-printf "
-Optional Features:
+printf "\nOptional Features:
  --disable-debugger     disable native debugger features
  --with-sysmagic        force to use system's magic
  --with-capstone5       build next branch of the capstone repository
  --disable-loadlibs     disable loading plugins
-  --with-shell-parser    Compile with radare2-shell-parser experimental support
  --without-fork         disable fork
  --without-ptrace-wrap  disable ptrace-wrap build
  --with-libr            build libr.a and libr.dylib
@ -189,10 +186,8 @@ Optional Features:
  --with-ostype          Choose OS type ( gnulinux windows darwin haiku ) (USEROSTYPE=auto)
  --with-libversion      specify different libversion (LIBVERSION=xxx)
  --without-jemalloc     build without jemalloc
-  --with-checks-level    value between 0 and 3 to enable different level of assert (see R_CHECKS_LEVEL) (R_CHECKS_LEVEL=2)
-"
-printf "
-Some influential environment variables:
+  --with-checks-level    value between 0 and 3 to enable different level of assert (see R_CHECKS_LEVEL) (R_CHECKS_LEVEL=2)\n"
+printf "\nSome influential environment variables:
  CC          C compiler command
  CFLAGS      C compiler flags
  CPPFLAGS    C preprocessor flags
@ -200,10 +195,8 @@ Some influential environment variables:
              nonstandard directory <lib dir>
  CPPFLAGS    C/C++ preprocessor flags, e.g. -I<include dir> if you have
              headers in a nonstandard directory <include dir>
-  CPP         C preprocessor
-"
-printf "
-Report bugs to: pancake <pancake@nopcode.org>"
+  CPP         C preprocessor\n"
+printf "\nReport bugs to: pancake <pancake@nopcode.org>"
 echo ""
 exit 0
 }
@ -245,7 +238,7 @@ echo "LANGS:     c"
 echo "REQUIRED:  libdl"
 echo "OPTIONAL:  libmagic libz libzip libxxhash libssl liblibuv>=1.0.0"
 echo "PKG-CONFIG:  capstone openssl libuv"
-echo "FLAGS:     --disable-debugger --with-sysmagic --with-capstone5 --disable-loadlibs --with-shell-parser --without-fork --without-ptrace-wrap --with-libr --with-syscapstone --with-syszip --with-sysxxhash --without-gpl --with-openssl --without-libuv --with-rpath --with-compiler=gcc --with-ostype=auto --with-libversion=xxx --without-jemalloc --with-checks-level=2"
+echo "FLAGS:     --disable-debugger --with-sysmagic --with-capstone5 --disable-loadlibs --without-fork --without-ptrace-wrap --with-libr --with-syscapstone --with-syszip --with-sysxxhash --without-gpl --with-openssl --without-libuv --with-rpath --with-compiler=gcc --with-ostype=auto --with-libversion=xxx --without-jemalloc --with-checks-level=2"
 	exit 0
 	;;
 --cache-file)
@ -295,7 +288,6 @@ echo "FLAGS:     --disable-debugger --with-sysmagic --with-capstone5 --disable-l
 "--with-sysmagic") USE_MAGIC="1"; ;;
 "--with-capstone5") CSNEXT="1"; ;;
 "--disable-loadlibs") LOADLIBS="0"; ;;
-"--with-shell-parser") USE_TREESITTER="1"; ;;
 "--without-fork") HAVE_FORK="0"; ;;
 "--without-ptrace-wrap") WANT_PTRACE_WRAP="0"; ;;
 "--with-libr") WITH_LIBR="1"; ;;
@ -328,7 +320,7 @@ parse_options "$1"
 shift
 done

-ENVWORDS="MANDIR INFODIR LIBDIR INCLUDEDIR LOCALSTATEDIR SYSCONFDIR DATADIR DOCDIR LIBEXECDIR SBINDIR BINDIR EPREFIX PREFIX SPREFIX TARGET HOST BUILD INSTALL INSTALL_LIB INSTALL_MAN INSTALL_PROGRAM INSTALL_PROGRAM_STRIP INSTALL_DIR INSTALL_SCRIPT INSTALL_DATA HOST_OS HOST_CPU BUILD_OS BUILD_CPU TARGET_OS TARGET_CPU VERSION VERSION_MAJOR VERSION_MINOR VERSION_PATCH VERSION_NUMBER PKGNAME VPATH CONTACT CONTACT_NAME CONTACT_MAIL CC CFLAGS CPPFLAGS LDFLAGS HAVE_LANG_C DEBUGGER HAVE_LIB_DL DL_LIBS HAVE_PATCH PATCH HAVE_GIT GIT HAVE_LIB_MAGIC USE_MAGIC USE_LIB_MAGIC LIBMAGIC CSNEXT LOADLIBS USE_TREESITTER HAVE_FORK WANT_PTRACE_WRAP WITH_LIBR WITH_CAPSTONE CAPSTONE_CFLAGS CAPSTONE_LDFLAGS HAVE_PKGCFG_CAPSTONE USE_CAPSTONE HAVE_LIB_Z HAVE_LIB_ZIP USE_ZIP USE_LIB_ZIP LIBZIP HAVE_LIB_XXHASH USE_XXHASH USE_LIB_XXHASH LIBXXHASH WITH_GPL HAVE_DECL_ADDR_NO_RANDOMIZE HAVE_ARC4RANDOM_UNIFORM HAVE_EXPLICIT_BZERO HAVE_EXPLICIT_MEMSET HAVE_CLOCK_NANOSLEEP HAVE_SIGACTION HAVE_LIB_GMP HAVE_LIB_SSL SSL_CFLAGS SSL_LDFLAGS HAVE_PKGCFG_OPENSSL HAVE_OPENSSL WANT_OPENSSL HAVE_LIBUV_VERSION_1_0_0 LIBUV_CFLAGS LIBUV_LDFLAGS HAVE_PKGCFG_LIBUV HAVE_LIBUV WANT_LIBUV USE_RPATH USERCC USEROSTYPE LIBVERSION HAVE_JEMALLOC HAVE_PTRACE USE_PTRACE_WRAP R_CHECKS_LEVEL"
+ENVWORDS="MANDIR INFODIR LIBDIR INCLUDEDIR LOCALSTATEDIR SYSCONFDIR DATADIR DOCDIR LIBEXECDIR SBINDIR BINDIR EPREFIX PREFIX SPREFIX TARGET HOST BUILD INSTALL INSTALL_LIB INSTALL_MAN INSTALL_PROGRAM INSTALL_PROGRAM_STRIP INSTALL_DIR INSTALL_SCRIPT INSTALL_DATA HOST_OS HOST_CPU BUILD_OS BUILD_CPU TARGET_OS TARGET_CPU VERSION VERSION_MAJOR VERSION_MINOR VERSION_PATCH VERSION_NUMBER PKGNAME VPATH CONTACT CONTACT_NAME CONTACT_MAIL CC CFLAGS CPPFLAGS LDFLAGS HAVE_LANG_C DEBUGGER HAVE_LIB_DL DL_LIBS HAVE_PATCH PATCH HAVE_GIT GIT HAVE_LIB_MAGIC USE_MAGIC USE_LIB_MAGIC LIBMAGIC CSNEXT LOADLIBS HAVE_FORK WANT_PTRACE_WRAP WITH_LIBR WITH_CAPSTONE CAPSTONE_CFLAGS CAPSTONE_LDFLAGS HAVE_PKGCFG_CAPSTONE USE_CAPSTONE HAVE_LIB_Z HAVE_LIB_ZIP USE_ZIP USE_LIB_ZIP LIBZIP HAVE_LIB_XXHASH USE_XXHASH USE_LIB_XXHASH LIBXXHASH WITH_GPL HAVE_DECL_ADDR_NO_RANDOMIZE HAVE_ARC4RANDOM_UNIFORM HAVE_EXPLICIT_BZERO HAVE_EXPLICIT_MEMSET HAVE_CLOCK_NANOSLEEP HAVE_SIGACTION HAVE_LIB_GMP HAVE_LIB_SSL SSL_CFLAGS SSL_LDFLAGS HAVE_PKGCFG_OPENSSL HAVE_OPENSSL WANT_OPENSSL HAVE_LIBUV_VERSION_1_0_0 LIBUV_CFLAGS LIBUV_LDFLAGS HAVE_PKGCFG_LIBUV HAVE_LIBUV WANT_LIBUV USE_RPATH USERCC USEROSTYPE LIBVERSION HAVE_JEMALLOC HAVE_PTRACE USE_PTRACE_WRAP R_CHECKS_LEVEL"

 create_environ

@ -696,7 +688,7 @@ done
 do_remove
 echo
 echo "Final report:"
-for A in  R_CHECKS_LEVEL PREFIX HAVE_LIB_GMP HAVE_OPENSSL HAVE_LIBUV USE_CAPSTONE HAVE_PTRACE USE_PTRACE_WRAP HAVE_FORK USE_TREESITTER VERSION USE_LIB_ZIP USE_LIB_MAGIC USE_LIB_XXHASH DEBUGGER CC USERCC HAVE_ARC4RANDOM_UNIFORM HAVE_EXPLICIT_BZERO HAVE_EXPLICIT_MEMSET USEROSTYPE LIBVERSION BUILD HOST TARGET ; do # REPORT
+for A in  R_CHECKS_LEVEL PREFIX HAVE_LIB_GMP HAVE_OPENSSL HAVE_LIBUV USE_CAPSTONE HAVE_PTRACE USE_PTRACE_WRAP HAVE_FORK VERSION USE_LIB_ZIP USE_LIB_MAGIC USE_LIB_XXHASH DEBUGGER CC USERCC HAVE_ARC4RANDOM_UNIFORM HAVE_EXPLICIT_BZERO HAVE_EXPLICIT_MEMSET USEROSTYPE LIBVERSION BUILD HOST TARGET ; do # REPORT
 eval VAL="\$${A}"
 [ -z "${VAL}" ] && VAL="(null)"
 echo " - ${A} = ${VAL}"
--- a/configure.acr
+++ b/configure.acr
@ -26,8 +26,6 @@ ARG_WITH CSNEXT capstone5 build next branch of the capstone repository ;
 (( useful for static builds . see sys/static.sh ))
 ARG_DISABLE LOADLIBS loadlibs disable loading plugins ;

-ARG_WITH USE_TREESITTER shell-parser Compile with radare2-shell-parser experimental support ;
-
 ARG_WITHOUT HAVE_FORK fork disable fork ;
 ARG_WITHOUT WANT_PTRACE_WRAP ptrace-wrap disable ptrace-wrap build ;

@ -216,7 +214,7 @@ IFEQ WANT_PTRACE_WRAP 0 ; {
 ARG_WITH R_CHECKS_LEVEL=2 checks-level value between 0 and 3 to enable different level of assert (see R_CHECKS_LEVEL) ;

 REPORT R_CHECKS_LEVEL PREFIX HAVE_LIB_GMP HAVE_OPENSSL HAVE_LIBUV USE_CAPSTONE HAVE_PTRACE USE_PTRACE_WRAP HAVE_FORK
-	USE_TREESITTER VERSION USE_LIB_ZIP USE_LIB_MAGIC USE_LIB_XXHASH DEBUGGER CC USERCC HAVE_ARC4RANDOM_UNIFORM
+	VERSION USE_LIB_ZIP USE_LIB_MAGIC USE_LIB_XXHASH DEBUGGER CC USERCC HAVE_ARC4RANDOM_UNIFORM
 	HAVE_EXPLICIT_BZERO HAVE_EXPLICIT_MEMSET USEROSTYPE LIBVERSION BUILD HOST TARGET ;

 (( TODO: Add the rest of .pc files here.. add a rule for acr? ))
--- a/libr/core/Makefile
+++ b/libr/core/Makefile
@ -13,9 +13,7 @@ OBJS+=vmenus.o vmenus_graph.o vmenus_zigns.o zdiff.o citem.o
 OBJS+=task.o panels.o pseudo.o vmarks.o anal_tp.o anal_objc.o blaze.o cundo.o

 CFLAGS+=-I../../shlr/heap/include
-ifeq ($(USE_TREESITTER),1)
 CFLAGS+=-I../../shlr/tree-sitter/lib/include -I../../shlr/radare2-shell-parser/src/tree_parser
-endif
 CFLAGS+=-DR2_PLUGIN_INCORE -I../../shlr
 LDFLAGS+=${DL_LIBS}

@ -43,9 +41,7 @@ OBJS+=$(STATIC_OBJS)
 #STATIC_OBJS=$(subst ..,p/..,$(subst core_,p/core_,$(STATIC_OBJ)))

 include $(TOP)/shlr/gdb/deps.mk
-ifeq ($(USE_TREESITTER),1)
 include $(TOP)/shlr/radare2-shell-parser-deps.mk
-endif
 include $(LTOP)/rules.mk

 # include plugins
--- a/libr/core/cmd.c
+++ b/libr/core/cmd.c
@ -27,20 +27,17 @@
 #include <sys/utsname.h>
 #endif

+#include <tree_sitter/api.h>
+TSLanguage *tree_sitter_r2cmd ();
+
 // NOTE: this should be in sync with SPECIAL_CHARACTERS in
 //       radare2-shell-parser grammar, except for ", ' and
 //       whitespaces, because we let cmd_substitution_arg create
 //       new arguments
 static const char *SPECIAL_CHARS_REGULAR = "@;~$#|`\"'()<>";
-
-#if USE_TREESITTER
 static const char *SPECIAL_CHARS_PF = "@;~$#|`\"'<>";
-#include <tree_sitter/api.h>
-TSLanguage *tree_sitter_r2cmd ();
-
 static const char *SPECIAL_CHARS_DOUBLE_QUOTED = "\"";
 static const char *SPECIAL_CHARS_SINGLE_QUOTED = "'";
-#endif

 R_API void r_save_panels_layout(RCore *core, const char *_name);
 R_API bool r_load_panels_layout(RCore *core, const char *_name);
@ -4542,8 +4539,6 @@ out_finish:

 static int run_cmd_depth(RCore *core, char *cmd);

-#if USE_TREESITTER
-
 struct tsr2cmd_state {
 	TSParser *parser;
 	RCore *core;
@ -5085,6 +5080,7 @@ DEFINE_HANDLE_TS_FCN(redirect_command) {

 	TSNode redirect_op = ts_node_child_by_field_name (node, "redirect_operator", strlen ("redirect_operator"));
 	if (is_ts_fdn_redirect_operator (redirect_op)) {
+		// this is the default operation, no html and no append
 	} else if (is_ts_fdn_append_operator (redirect_op)) {
 		is_append = true;
 	} else if (is_ts_html_redirect_operator (redirect_op)) {
@ -5405,7 +5401,7 @@ DEFINE_HANDLE_TS_FCN(tmp_reli_command) {
 	ut64 orig_offset = state->core->offset;
 	ut64 addr = r_num_math (core->num, arg_str);
 	if (addr) {
-		r_core_cmdf (core, "so %d", addr);
+		r_core_cmdf (core, "so %" PFMT64d, addr);
 	}
 	bool res = handle_ts_command_tmpseek (state, command);
 	r_core_seek (state->core, orig_offset, true);
@ -5435,7 +5431,7 @@ DEFINE_HANDLE_TS_FCN(tmp_fd_command) {
 	TSNode command = ts_node_named_child (node, 0);
 	TSNode arg = ts_node_named_child (node, 1);
 	char *arg_str = ts_node_handle_arg (state, node, arg, 1);
-	int tmpfd = core->io->desc ? core->io->desc->fd : -1;
+	int tmpfd = core->io->desc? core->io->desc->fd: -1;
 	r_io_use_fd (core->io, atoi (arg_str));
 	bool res = handle_ts_command (state, command);
 	r_io_use_fd (core->io, tmpfd);
@ -6522,7 +6518,7 @@ DEFINE_HANDLE_TS_FCN(commands) {
 #define HANDLER_RULE_OP(name) { #name, handle_ts_##name },
 #define RULE_OP(name)

-struct ts_data_symbol_map map[] = {
+struct ts_data_symbol_map map_ts_command_handlers[] = {
 	#include "r2-shell-parser-cmds.inc"
 	{ NULL, NULL },
 };
@ -6530,7 +6526,7 @@ struct ts_data_symbol_map map[] = {
 #define RULE_OP(name) { #name, &ts_##name##_symbol },
 #define HANDLER_RULE_OP(name) RULE_OP(name)

-struct ts_data_symbol_map map_symbols[] = {
+struct ts_data_symbol_map map_ts_symbols[] = {
 	#include "r2-shell-parser-cmds.inc"
 	{ NULL, NULL },
 };
@ -6542,14 +6538,14 @@ static void ts_symbols_init(RCmd *cmd) {
 	TSLanguage *lang = tree_sitter_r2cmd ();
 	cmd->language = lang;
 	cmd->ts_symbols_ht = ht_up_new0 ();
-	struct ts_data_symbol_map *entry = map;
+	struct ts_data_symbol_map *entry = map_ts_command_handlers;
 	while (entry->name) {
 		TSSymbol symbol = ts_language_symbol_for_name (lang, entry->name, strlen (entry->name), true);
 		ht_up_insert (cmd->ts_symbols_ht, symbol, entry->data);
 		entry++;
 	}

-	entry = map_symbols;
+	entry = map_ts_symbols;
 	while (entry->name) {
 		TSSymbol *sym_ptr = entry->data;
 		*sym_ptr = ts_language_symbol_for_name (lang, entry->name, strlen (entry->name), true);
@ -6608,7 +6604,6 @@ static bool core_cmd_tsr2cmd(RCore *core, const char *cstr, bool split_lines, bo
 	core->cons->context->cmd_depth++;
 	return res;
 }
-#endif

 static int run_cmd_depth(RCore *core, char *cmd) {
 	char *rcmd;
@ -6640,11 +6635,7 @@ static int run_cmd_depth(RCore *core, char *cmd) {

 R_API int r_core_cmd(RCore *core, const char *cstr, int log) {
 	if (core->use_tree_sitter_r2cmd) {
-#if USE_TREESITTER
 		return core_cmd_tsr2cmd (core, cstr, false, log)? 0: 1;
-#else
-		R_LOG_WARN ("No compilation support for radare2-shell-parser\n");
-#endif
 	}

 	int ret = false, i;
--- a/libr/core/cmd_api.c
+++ b/libr/core/cmd_api.c
@ -41,9 +41,7 @@ R_API RCmd *r_cmd_free(RCmd *cmd) {
 	if (!cmd) {
 		return NULL;
 	}
-#if USE_TREESITTER
 	ht_up_free (cmd->ts_symbols_ht);
-#endif
 	r_cmd_alias_free (cmd);
 	r_cmd_macro_fini (&cmd->macro);
 	// dinitialize plugin commands
--- a/libr/include/r_cmd.h
+++ b/libr/include/r_cmd.h
@ -76,10 +76,8 @@ typedef struct r_cmd_t {
 	RList *lcmds;
 	RList *plist;
 	RCmdAlias aliases;
-#if USE_TREESITTER
 	void *language; // used to store TSLanguage *
 	HtUP *ts_symbols_ht;
-#endif
 } RCmd;

 // TODO WIP
--- a/libr/include/r_userconf.h.acr
+++ b/libr/include/r_userconf.h.acr
@ -98,8 +98,6 @@
 #define USE_PTRACE_WRAP @USE_PTRACE_WRAP@
 #define HAVE_FORK @HAVE_FORK@

-#define USE_TREESITTER @USE_TREESITTER@
-
 #define WITH_GPL @WITH_GPL@

 #if __APPLE__ && __POWERPC__
--- a/meson.build
+++ b/meson.build
@ -343,7 +343,6 @@ userconf.set10('HAVE_FORK', true)
 userconf.set10('HAVE_PTRACE', have_ptrace)
 userconf.set10('USE_PTRACE_WRAP', use_ptrace_wrap)
 userconf.set10('WITH_GPL', true)
-userconf.set10('USE_TREESITTER', get_option('use_treesitter'))
 ok = cc.has_header_symbol('sys/personality.h', 'ADDR_NO_RANDOMIZE')
 userconf.set10('HAVE_DECL_ADDR_NO_RANDOMIZE', ok)

--- a/meson_options.txt
+++ b/meson_options.txt
@ -27,13 +27,11 @@ option('use_sys_lz4', type: 'boolean', value: false)
 option('use_sys_xxhash', type: 'boolean', value: false)
 option('use_sys_openssl', type: 'boolean', value: false)
 option('use_libuv', type: 'boolean', value: true)
-option('use_treesitter', type: 'boolean', value: false)
 option('debugger', type: 'boolean', value: true)

 option('use_webui', type: 'boolean', value: false, description: 'install different WebUIs for radare2')

-option('shell_parser_in_builddir', type: 'boolean', value: true, description: 'When true, radare2-shell-parser is downloaded in the build directory')
-option('tree_sitter_in_builddir', type: 'boolean', value: true, description: 'When true, tree-sitter is downloaded in the build directory')
-
 option('enable_tests', type: 'boolean', value: false, description: 'Build unit tests in test/unit')
-option('enable_r2r', type: 'boolean', value: true, description: 'Build r2r executable for regression testing')
+option('enable_r2r', type: 'boolean', value: true, description: 'Build r2r executable for regression testing')
+
+option('tree-sitter-sync', type: 'boolean', value: false, description: 'Force a sync of shlr/tree-sitter before building')
--- a/shlr/Makefile
+++ b/shlr/Makefile
@ -31,11 +31,6 @@ TS_URL=https://github.com/tree-sitter/tree-sitter.git
 TS_BRA=master
 TS_TIP=f049ba350f3f6019ce9a1cbb0975ebd154ef7ad3

-# NOTE: when you update SHELLPARSER_TIP or SHELLPARSER_BRA, also update them in shlr/meson.build
-SHELLPARSER_URL=https://github.com/ret2libc/radare2-shell-parser.git
-SHELLPARSER_BRA=master
-SHELLPARSER_TIP=3d82cad9d865cb6e65364f66f038d1d1d4d8818a
-
 ifeq ($(CS_RELEASE),1)
 CS_VER=4.0.1
 CS_TAR=https://codeload.github.com/aquynh/capstone/tar.gz/$(CS_VER)
@ -65,7 +60,7 @@ CS_REV=
 CS_PATCHES=1
 endif

-.PHONY: capstone-sync capstone-build all clean mrproper libgdbr libwindbg bochs tree-sitter-sync radare2-shell-parser-sync
+.PHONY: capstone-sync capstone-build all clean mrproper libgdbr libwindbg bochs tree-sitter-sync

 HOST_CC?=gcc
 SHLR?=$(shell pwd)
@ -329,37 +324,37 @@ else
 	cd ../../radare2-webui/www/m && git pull ; npm i ; $(MAKE) release
 endif

-ifeq ($(USE_TREESITTER),1)
 tree-sitter-build: tree-sitter/libtree-sitter.$(EXT_AR)

 tree-sitter/libtree-sitter.$(EXT_AR): tree-sitter/lib/src/lib.o
 	$(AR) rvs $@ $<
 	$(RANLIB) $@

-tree-sitter/lib/src/lib.o: tree-sitter-sync
+tree-sitter/lib/src/lib.o:
 	$(CC) -std=c99 -c tree-sitter/lib/src/lib.c -o $@ -Itree-sitter/lib/include -Itree-sitter/lib/src $(CFLAGS)

 tree-sitter-sync:
-	"$(SHELL)" clone_3rd_repo.sh tree-sitter "${TS_URL}" "${TS_BRA}" "${TS_TIP}"
+	rm -rf tree-sitter tree-sitter.vc
+	"$(SHELL)" clone_3rd_repo.sh tree-sitter.vc "${TS_URL}" "${TS_BRA}" "${TS_TIP}"
+	mkdir -p tree-sitter/lib
+	cp -rf ./tree-sitter.vc/lib/src ./tree-sitter/lib
+	cp -rf ./tree-sitter.vc/lib/include ./tree-sitter/lib
+	rm -rf tree-sitter.vc

 radare2-shell-parser-build: radare2-shell-parser/libshell-parser.$(EXT_AR)

-radare2-shell-parser/libshell-parser.$(EXT_AR): radare2-shell-parser/src/parser.o
-	$(AR) rvs $@ $<
+radare2-shell-parser/libshell-parser.$(EXT_AR): radare2-shell-parser/src/parser.o radare2-shell-parser/src/scanner.o
+	$(AR) rvs $@ radare2-shell-parser/src/parser.o radare2-shell-parser/src/scanner.o
 	$(RANLIB) $@

-radare2-shell-parser/src/parser.o: radare2-shell-parser-sync
+radare2-shell-parser/src/parser.o: radare2-shell-parser/src/parser.c
 	$(CC) -c radare2-shell-parser/src/parser.c -o $@ -Iradare2-shell-parser/src/tree_sitter -Itree-sitter/lib/include $(CFLAGS)

-radare2-shell-parser-sync: tree-sitter-sync
-	"$(SHELL)" clone_3rd_repo.sh radare2-shell-parser "${SHELLPARSER_URL}" "${SHELLPARSER_BRA}" "${SHELLPARSER_TIP}"
+radare2-shell-parser/src/scanner.o: radare2-shell-parser/src/scanner.c
+	$(CC) -c radare2-shell-parser/src/scanner.c -o $@ -Iradare2-shell-parser/src/tree_sitter -Itree-sitter/lib/include $(CFLAGS)

 SHLRS+=tree-sitter/libtree-sitter.a
 SHLRS+=radare2-shell-parser/libshell-parser.a
-else
-tree-sitter-build:
-radare2-shell-parser-build:
-endif

 www-sync-m sync-www-m: ../../radare2-webui/dist/m
 	cp -rf ../../radare2-webui/dist/m www/m.tmp
--- a/shlr/meson.build
+++ b/shlr/meson.build
@ -232,109 +232,79 @@ sdb_gen_cmd = [
  '@INPUT@'
 ]

-
-if get_option('use_treesitter')
-  # handle tree-sitter dependency
-  if get_option('tree_sitter_in_builddir')
-    tree_sitter_path = join_paths(meson.current_build_dir(), 'tree-sitter')
-  else
-    tree_sitter_path = join_paths(meson.current_source_dir(), 'tree-sitter')
-  endif
-  res = run_command(py3_exe, '-c', '__import__("sys").exit(__import__("os").path.exists("@0@"))'.format(tree_sitter_path))
-  if res.returncode() == 0
-    if not git_exe.found()
-      error('Cannot load tree-sitter library. Either provide tree-sitter in ./shlr/tree-sitter or install git, so it can be downloaded')
-    endif
-
-    # NOTE: when you update TS_TIP or TS_BRA, also update them in shlr/Makefile
-    TS_TIP = 'f049ba350f3f6019ce9a1cbb0975ebd154ef7ad3'
-    TS_BRA = 'master'
-
-    message('Cloning tree-sitter ' + TS_BRA + ' branch, commit ' + TS_TIP + ', into ' + tree_sitter_path)
-    git_cmd = 'clone -b @0@ https://github.com/tree-sitter/tree-sitter.git @1@'.format(TS_BRA, tree_sitter_path)
-    clone_cmd = run_command(git_exe, git_cmd.split())
-    if clone_cmd.returncode() != 0
-      error('Cannot execute git clone command')
-    endif
-
-    reset_cmd_str = '-C @0@ reset --hard @1@'.format(tree_sitter_path, TS_TIP)
-    reset_cmd = run_command(git_exe, reset_cmd_str.split())
-    if reset_cmd.returncode() != 0
-      error('Cannot execute git reset command')
-    endif
+# handle tree-sitter
+tree_sitter_path = join_paths(meson.current_source_dir(), 'tree-sitter')
+tree_sitter_vc_path = join_paths(meson.current_source_dir(), 'tree-sitter.vc')
+if get_option('tree-sitter-sync')
+  if not git_exe.found()
+    error('Cannot sync tree-sitter library. Either provide tree-sitter in ./shlr/tree-sitter or install git, so it can be downloaded')
  endif

-  tree_sitter_files = [
-    join_paths(tree_sitter_path, 'lib/src/lib.c'),
-  ]
+  # NOTE: when you update TS_TIP or TS_BRA, also update them in shlr/Makefile
+  TS_TIP = 'f049ba350f3f6019ce9a1cbb0975ebd154ef7ad3'
+  TS_BRA = 'master'

-  tree_sitter_inc = [platform_inc, include_directories('tree-sitter/lib/src'), include_directories('tree-sitter/lib/include')]
-
-  libtree_sitter = static_library('tree_sitter', tree_sitter_files,
-    include_directories: tree_sitter_inc,
-    implicit_include_directories: false,
-    c_args: ['-std=c99']
-  )
-
-  tree_sitter_dep = declare_dependency(
-    link_with: libtree_sitter,
-    include_directories: tree_sitter_inc
-  )
-
-
-  # handle radare2-shell-parser dependency
-  if get_option('shell_parser_in_builddir')
-    shell_parser_path = join_paths(meson.current_build_dir(), 'radare2-shell-parser')
-  else
-    shell_parser_path = join_paths(meson.current_source_dir(), 'radare2-shell-parser')
-  endif
-  res = run_command(py3_exe, '-c', '__import__("sys").exit(__import__("os").path.exists("@0@"))'.format(shell_parser_path))
-  if res.returncode() == 0
-    if not git_exe.found()
-      error('Cannot load radare2-shell-parser library. Either provide radare2-shell-parser in ./shlr/radare2-shell-parser or install git, so it can be downloaded')
-    endif
-
-    # NOTE: when you update SHELLPARSER_TIP or SHELLPARSER_BRA, also update them in shlr/Makefile
-    SHELLPARSER_TIP = '3d82cad9d865cb6e65364f66f038d1d1d4d8818a'
-    SHELLPARSER_BRA = 'master'
-    shell_parser_user = 'ret2libc'
-
-    message('Cloning radare2-shell-parser ' + SHELLPARSER_BRA + ' branch, commit ' + SHELLPARSER_TIP + ', into ' + shell_parser_path)
-    git_cmd = 'clone -b @0@ https://github.com/@1@/radare2-shell-parser.git @2@'.format(SHELLPARSER_BRA, shell_parser_user, shell_parser_path)
-    clone_cmd = run_command(git_exe, git_cmd.split())
-    if clone_cmd.returncode() != 0
-      error('Cannot execute git clone command')
-    endif
-
-    reset_cmd_str = '-C @0@ reset --hard @1@'.format(shell_parser_path, SHELLPARSER_TIP)
-    reset_cmd = run_command(git_exe, reset_cmd_str.split())
-    if reset_cmd.returncode() != 0
-      error('Cannot execute git reset command')
-    endif
+  message('Deleting existing directories @0@ and @1@'.format(tree_sitter_vc_path, tree_sitter_path))
+  res = run_command('rm', '-rf @0@ @1@'.format(tree_sitter_vc_path, tree_sitter_path).split())
+  message('Cloning tree-sitter ' + TS_BRA + ' branch, commit ' + TS_TIP + ', into ' + tree_sitter_vc_path)
+  git_cmd = 'clone -b @0@ https://github.com/tree-sitter/tree-sitter.git @1@'.format(TS_BRA, tree_sitter_vc_path)
+  clone_cmd = run_command(git_exe, git_cmd.split())
+  if clone_cmd.returncode() != 0
+    error('Cannot execute git clone command')
  endif

-  shell_parser_files = [
-    join_paths(shell_parser_path, 'src/parser.c'),
-    join_paths(shell_parser_path, 'src/scanner.c'),
-  ]
+  reset_cmd_str = '-C @0@ reset --hard @1@'.format(tree_sitter_vc_path, TS_TIP)
+  reset_cmd = run_command(git_exe, reset_cmd_str.split())
+  if reset_cmd.returncode() != 0
+    error('Cannot execute git reset command')
+  endif

-  shell_parser_inc = [platform_inc, include_directories('radare2-shell-parser/src/tree_sitter')]
-
-  libshell_parser = static_library('shell_parser', shell_parser_files,
-    include_directories: shell_parser_inc + tree_sitter_inc,
-    implicit_include_directories: true
-  )
-
-  shell_parser_dep = declare_dependency(
-    link_with: libshell_parser,
-    include_directories: shell_parser_inc,
-    dependencies: tree_sitter_dep
-  )
-else
-  shell_parser_dep = []
-  tree_sitter_dep = []
+  message('Copying files from @0@ to @1@'.format(tree_sitter_vc_path, tree_sitter_path))
+  res = run_command('mkdir',  '-p @0@/lib'.format(tree_sitter_path).split())
+  res = run_command('cp',  '-r @0@/lib/src @1@/lib'.format(tree_sitter_vc_path, tree_sitter_path).split())
+  res = run_command('cp',  '-r @0@/lib/include @1@/lib'.format(tree_sitter_vc_path, tree_sitter_path).split())
+  message('Deleting @0@'.format(tree_sitter_vc_path))
+  res = run_command('rm', '-rf @0@'.format(tree_sitter_vc_path).split())
 endif

+tree_sitter_files = [
+  join_paths(tree_sitter_path, 'lib/src/lib.c'),
+]
+
+tree_sitter_inc = [platform_inc, include_directories('tree-sitter/lib/src'), include_directories('tree-sitter/lib/include')]
+
+libtree_sitter = static_library('tree_sitter', tree_sitter_files,
+  include_directories: tree_sitter_inc,
+  implicit_include_directories: false,
+  c_args: ['-std=c99']
+)
+
+tree_sitter_dep = declare_dependency(
+  link_with: libtree_sitter,
+  include_directories: tree_sitter_inc
+)
+
+
+# new radare2 shell parser
+shell_parser_path = join_paths(meson.current_source_dir(), 'radare2-shell-parser')
+shell_parser_files = [
+  join_paths(shell_parser_path, 'src/parser.c'),
+  join_paths(shell_parser_path, 'src/scanner.c'),
+]
+
+shell_parser_inc = [platform_inc, include_directories('radare2-shell-parser/src/tree_sitter')]
+
+libshell_parser = static_library('shell_parser', shell_parser_files,
+  include_directories: shell_parser_inc + tree_sitter_inc,
+  implicit_include_directories: true
+)
+
+shell_parser_dep = declare_dependency(
+  link_with: libshell_parser,
+  include_directories: shell_parser_inc,
+  dependencies: tree_sitter_dep
+)
+

 # handle bochs dependency
 bochs_files = [
--- a/shlr/radare2-shell-parser/.gitignore
+++ b/shlr/radare2-shell-parser/.gitignore
@ -0,0 +1 @@
+node_modules
--- a/shlr/radare2-shell-parser/README.md
+++ b/shlr/radare2-shell-parser/README.md
@ -0,0 +1,26 @@
+# radare2-shell-parser
+
+This is the parser for radare2 shell language.
+See https://tree-sitter.github.io/tree-sitter/creating-parsers for more info on
+how to create a parser with tree-sitter.
+
+## Sources
+- grammar.js: defines the basic grammar
+- src/scanner.c: external scanner used to scan some tokens that cannot be parsed
+  with the regular js grammar, as that includes only the context-free part of
+  the language.
+- src/parser.c: this file is auto-generated by tree-sitter based on grammar.js
+- corpus/\*: list of test files used to ensure the grammar works well
+
+## How to update grammar
+When you update something in grammar.js or src/scanner.c you have to re-generate
+the parser. The process works as follows:
+
+1. Do changes as needed to grammar.js and/or src/scanner.c
+2. Install npm dependencies with: `cd shlr/radare2-shell-parser ; npm install`
+3. Make sure tree-sitter is in PATH: `export PATH=$PATH:./node_modules/.bin`
+4. Re-generate the parser files: `tree-sitter generate`
+5. Check tests still pass: `tree-sitter test`. Use `tree-sitter parse
+   ./example-file` if you want to see the syntax tree of a custom input provided
+   in `./example-file`
+6. Commit auto-generated files and modified grammar.js and src/scanner.c into git.
--- a/shlr/radare2-shell-parser/binding.gyp
+++ b/shlr/radare2-shell-parser/binding.gyp
@ -0,0 +1,19 @@
+{
+  "targets": [
+    {
+      "target_name": "tree_sitter_r2cmd_binding",
+      "include_dirs": [
+        "<!(node -e \"require('nan')\")",
+        "src"
+      ],
+      "sources": [
+        "src/parser.c",
+        "src/scanner.c",
+        "src/binding.cc"
+      ],
+      "cflags_c": [
+        "-std=c99 -ggdb -O0",
+      ]
+    }
+  ]
+}
--- a/shlr/radare2-shell-parser/corpus/cmd_substitution.txt
+++ b/shlr/radare2-shell-parser/corpus/cmd_substitution.txt
@ -0,0 +1,67 @@
+=======================================
+Command substitution used as simple arg $(
+=======================================
+
+?e $(p8 10)
+
+---
+
+(commands
+  (arged_command command: (cmd_identifier)
+    args: (args
+            (arg (cmd_substitution_arg
+	      (arged_command command: (cmd_identifier)
+	        args: (args (arg (arg_identifier)))))))))
+
+=======================================
+Command substitution with multiple commands
+=======================================
+
+?e $(p8 10; p8 4 @ 0xdeadbeef)
+
+---
+
+(commands
+  (arged_command command: (cmd_identifier)
+    args: (args
+            (arg (cmd_substitution_arg
+	      (arged_command command: (cmd_identifier)
+	        args: (args (arg (arg_identifier))))
+	      (tmp_seek_command
+	        (arged_command command: (cmd_identifier)
+	          args: (args (arg (arg_identifier))))
+		(arg (arg_identifier))))))))
+
+
+
+=======================================
+Command substitution used as simple arg `
+=======================================
+
+?e `p8 10`
+
+---
+
+(commands
+  (arged_command command: (cmd_identifier)
+    args: (args
+            (arg (cmd_substitution_arg
+	      (arged_command command: (cmd_identifier)
+	        args: (args (arg (arg_identifier)))))))))
+
+
+=======================================
+Nested command substitution
+=======================================
+
+?e $(p8 $(?e 10))
+
+---
+
+(commands
+  (arged_command command: (cmd_identifier)
+    args: (args (arg (cmd_substitution_arg
+	         (arged_command command: (cmd_identifier)
+	           args: (args (arg (cmd_substitution_arg
+		           (arged_command command: (cmd_identifier)
+			     args: (args (arg (arg_identifier)))))))))))))
--- a/shlr/radare2-shell-parser/corpus/comments.txt
+++ b/shlr/radare2-shell-parser/corpus/comments.txt
@ -0,0 +1,85 @@
+=======================
+One command and comment
+=======================
+
+afl # af is not going to be parsed
+
+---
+
+(commands
+  (arged_command (cmd_identifier)))
+
+
+==================
+Begin with comment
+==================
+
+# this is just a comment
+#    ~?
+# boh
+
+---
+
+(commands)
+
+
+====================================
+Multiple commands multiple comments
+====================================
+
+afl # first comment
+p8 10 # second comment
+p8 10# third comment
+
+---
+
+(commands
+  (arged_command (cmd_identifier))
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier))))
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier)))))
+
+
+===========================
+Multiline comment on a line
+===========================
+
+p8 /* inline comment */ 3
+afl
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier))))
+  (arged_command (cmd_identifier)))
+
+
+=====================================
+Multiline comment on multiple lines
+=====================================
+
+p8 /* multiline
+comment */ 3
+afl
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier))))
+  (arged_command (cmd_identifier)))
+
+
+==================================
+Comment with # in the middle line
+==================================
+
+p8 4 # something
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier)))))
--- a/shlr/radare2-shell-parser/corpus/escape_args.txt
+++ b/shlr/radare2-shell-parser/corpus/escape_args.txt
@ -0,0 +1,40 @@
+============================
+Escape special command chars
+============================
+
+pd 10\@test\>name
+
+---
+
+(commands
+  (arged_command
+    command: (cmd_identifier)
+    args: (args (arg (arg_identifier)))))
+
+
+====================
+Use newlines in echo
+====================
+
+?e "Hello\nWorld"
+
+---
+
+(commands
+  (arged_command
+    command: (cmd_identifier)
+    args: (args (arg (double_quoted_arg)))))
+
+
+===========
+Escape hash
+===========
+
+?e Hello\#World
+
+---
+
+(commands
+  (arged_command
+    command: (cmd_identifier)
+    args: (args (arg (arg_identifier)))))
--- a/shlr/radare2-shell-parser/corpus/foreach3.txt
+++ b/shlr/radare2-shell-parser/corpus/foreach3.txt
@ -0,0 +1,221 @@
+=================
+Foreach addr+size
+=================
+
+pd @@@=0xdeadbeef 10
+pd @@@=0xdeadbeef 10 $$ 20
+
+---
+
+(commands
+  (foreach_addrsize_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))
+    (arg (arg_identifier)))
+  (foreach_addrsize_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))
+    (arg (arg_identifier))
+    (arg (arg_identifier))
+    (arg (arg_identifier))))
+
+
+===================
+Foreach basic block
+===================
+
+pd @@@b
+
+---
+
+(commands
+  (foreach_bb_command
+    (arged_command (cmd_identifier))))
+
+
+===============
+Foreach command
+===============
+
+pd @@@c:?e hello
+
+---
+
+(commands
+  (foreach_cmd_command
+    (arged_command (cmd_identifier))
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))))
+
+
+===============
+Foreach comment
+===============
+
+pd @@@C:comment
+
+---
+
+(commands
+  (foreach_comment_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))))
+
+
+==============
+Foreach import
+==============
+
+pd @@@i
+
+---
+
+(commands
+  (foreach_import_command
+    (arged_command (cmd_identifier))))
+
+
+================
+Foreach register
+================
+
+pd @@@r
+
+---
+
+(commands
+  (foreach_register_command
+    (arged_command (cmd_identifier))))
+
+
+==============
+Foreach symbol
+==============
+
+pd @@@s
+
+---
+
+(commands
+  (foreach_symbol_command
+    (arged_command (cmd_identifier))))
+
+
+==============
+Foreach string
+==============
+
+pd @@@st
+
+---
+
+(commands
+  (foreach_string_command
+    (arged_command (cmd_identifier))))
+
+
+===============
+Foreach section
+===============
+
+pd @@@S
+
+---
+
+(commands
+  (foreach_section_command
+    (arged_command (cmd_identifier))))
+
+
+===============
+Foreach io.maps
+===============
+
+pd @@@m
+
+---
+
+(commands
+  (foreach_iomap_command
+    (arged_command (cmd_identifier))))
+
+
+===============
+Foreach dbg.map
+===============
+
+pd @@@M
+
+---
+
+(commands
+  (foreach_dbgmap_command
+    (arged_command (cmd_identifier))))
+
+
+============
+Foreach flag
+============
+
+pd @@@f
+
+---
+
+(commands
+  (foreach_flag_command
+    (arged_command (cmd_identifier))))
+
+
+==================
+Foreach flag match
+==================
+
+pd @@@f:hit*
+
+---
+
+(commands
+  (foreach_flag_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))))
+
+
+================
+Foreach function
+================
+
+pd @@@F
+
+---
+
+(commands
+  (foreach_function_command
+    (arged_command (cmd_identifier))))
+
+
+================
+Foreach function match
+================
+
+pd @@@F:hit*
+
+---
+
+
+(commands
+  (foreach_function_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))))
+
+
+==============
+Foreach thread
+==============
+
+pd @@@t
+
+---
+
+(commands
+  (foreach_thread_command
+    (arged_command (cmd_identifier))))
--- a/shlr/radare2-shell-parser/corpus/grep_commands.txt
+++ b/shlr/radare2-shell-parser/corpus/grep_commands.txt
@ -0,0 +1,60 @@
+================
+Very simple grep
+================
+
+pd 10 ~ mov
+pd 10~mov
+afl~$
+
+---
+
+(commands
+  (grep_command
+    command: (arged_command
+    	       command: (cmd_identifier)
+	       args: (args (arg (arg_identifier))))
+    specifier: (grep_specifier (grep_specifier_identifier)))
+  (grep_command
+    command: (arged_command
+    	       command: (cmd_identifier)
+	       args: (args (arg (arg_identifier))))
+    specifier: (grep_specifier (grep_specifier_identifier)))
+  (grep_command
+    command: (arged_command command: (cmd_identifier))
+    specifier: (grep_specifier (grep_specifier_identifier))))
+
+
+=======================================
+Grep with cmd substitution as specifier
+=======================================
+
+pd 10~`?e mov`
+pd 10~mo`?e v`
+pd 10~mo$(?e v)
+
+---
+
+(commands
+  (grep_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (grep_specifier
+      (cmd_substitution_arg
+        (arged_command (cmd_identifier)
+	  (args (arg (arg_identifier)))))))
+  (grep_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (grep_specifier
+      (grep_specifier_identifier)
+      (cmd_substitution_arg
+        (arged_command (cmd_identifier)
+	  (args (arg (arg_identifier)))))))
+  (grep_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (grep_specifier
+      (grep_specifier_identifier)
+      (cmd_substitution_arg
+        (arged_command (cmd_identifier)
+	  (args (arg (arg_identifier))))))))
--- a/shlr/radare2-shell-parser/corpus/iter_commands.txt
+++ b/shlr/radare2-shell-parser/corpus/iter_commands.txt
@ -0,0 +1,205 @@
+============
+Iter flags
+============
+
+p8 4 @@ sym.*
+
+---
+
+(commands
+  (iter_flags_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (arg (arg_identifier))))
+
+========
+Iter dbt
+========
+
+p8 4 @@dbt
+p8 4 @@dbta
+p8 4 @@dbtb
+p8 4 @@dbts
+
+---
+
+(commands
+  (iter_dbta_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier)))))
+  (iter_dbta_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier)))))
+  (iter_dbtb_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier)))))
+  (iter_dbts_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))))
+
+===============
+Iter file lines
+===============
+
+p8 @@.file
+
+---
+
+(commands
+  (iter_file_lines_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))))
+
+===============
+Iter offsets
+===============
+
+p8 @@=off1 off2
+
+---
+
+(commands
+  (iter_offsets_command
+    (arged_command (cmd_identifier))
+    (args (arg (arg_identifier)) (arg (arg_identifier)))))
+
+
+===============
+Iter sdbquery
+===============
+
+p8 @@k sdbquery
+
+---
+
+(commands
+  (iter_sdbquery_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))))
+
+
+===============
+Iter threads
+===============
+
+p8 @@t
+
+---
+
+(commands
+  (iter_threads_command
+    (arged_command (cmd_identifier))))
+
+
+===============
+Iter basic blocks
+===============
+
+p8 @@b
+
+---
+
+(commands
+  (iter_bbs_command
+    (arged_command (cmd_identifier))))
+
+
+===============
+Iter instructions
+===============
+
+p8 @@i
+
+---
+
+(commands
+  (iter_instrs_command
+    (arged_command (cmd_identifier))))
+
+
+===============
+Iter sections
+===============
+
+p8 @@iS
+
+---
+
+(commands
+  (iter_sections_command
+    (arged_command (cmd_identifier))))
+
+
+===============
+Iter functions
+===============
+
+p8 @@f
+
+---
+
+(commands
+  (iter_functions_command
+    (arged_command (cmd_identifier))))
+
+
+===============
+Iter function matching
+===============
+
+p8 @@f:write
+
+---
+
+(commands
+  (iter_functions_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))))
+
+
+===============
+Iter by step
+===============
+
+p8 @@s:0xd000 0xe000 0x100
+
+---
+
+(commands
+  (iter_step_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))
+    (arg (arg_identifier))
+    (arg (arg_identifier))))
+
+
+===============
+Iter command
+===============
+
+p8 @@c:/x 9090
+
+---
+
+(commands
+  (iter_interpret_command
+    (arged_command (cmd_identifier))
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))))
+
+
+================
+Iter search hits
+================
+
+pd 2 @@/x 9090
+
+---
+
+(commands
+  (iter_hit_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))))
+
--- a/shlr/radare2-shell-parser/corpus/pf_commands.txt
+++ b/shlr/radare2-shell-parser/corpus/pf_commands.txt
@ -0,0 +1,230 @@
+==========================
+Show data with format_name
+==========================
+
+pf fmt_name
+pf* fmt_name
+pfc fmt_name
+pfj fmt_name
+pfq fmt_name
+pfs fmt_name
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (pf_args (pf_arg (pf_arg_identifier))))
+  (arged_command (cmd_identifier)
+    (pf_args (pf_arg (pf_arg_identifier))))
+  (arged_command (cmd_identifier)
+    (pf_args (pf_arg (pf_arg_identifier))))
+  (arged_command (cmd_identifier)
+    (pf_args (pf_arg (pf_arg_identifier))))
+  (arged_command (cmd_identifier)
+    (pf_args (pf_arg (pf_arg_identifier))))
+  (arged_command (cmd_identifier)
+    (pf_args (pf_arg (pf_arg_identifier)))))
+
+
+================================
+Show data with format definition (simple)
+================================
+
+pf 3xi foo bar
+pf* 3xi foo bar
+pfc 3xi foo bar
+pfj 3xi foo bar
+pfq 3xi foo bar
+pfs 3xi foo bar
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (pf_args
+      (pf_arg (pf_arg_identifier))
+      (pf_arg (pf_arg_identifier))
+      (pf_arg (pf_arg_identifier))))
+  (arged_command (cmd_identifier)
+    (pf_args
+      (pf_arg (pf_arg_identifier))
+      (pf_arg (pf_arg_identifier))
+      (pf_arg (pf_arg_identifier))))
+  (arged_command (cmd_identifier)
+    (pf_args
+      (pf_arg (pf_arg_identifier))
+      (pf_arg (pf_arg_identifier))
+      (pf_arg (pf_arg_identifier))))
+  (arged_command (cmd_identifier)
+    (pf_args
+      (pf_arg (pf_arg_identifier))
+      (pf_arg (pf_arg_identifier))
+      (pf_arg (pf_arg_identifier))))
+  (arged_command (cmd_identifier)
+    (pf_args
+      (pf_arg (pf_arg_identifier))
+      (pf_arg (pf_arg_identifier))
+      (pf_arg (pf_arg_identifier))))
+  (arged_command (cmd_identifier)
+    (pf_args
+      (pf_arg (pf_arg_identifier))
+      (pf_arg (pf_arg_identifier))
+      (pf_arg (pf_arg_identifier)))))
+
+
+===============
+pf. format name
+===============
+
+pf.fmt_name
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (pf_dot_cmd_args (pf_args (pf_arg (pf_arg_identifier))))))
+
+
+=================
+pf. format fields
+=================
+
+pf.fmt_name.field_name
+pf.fmt_name.field_name=33
+pf.fmt_name.field_name[3]
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (pf_dot_cmd_args
+      (pf_args
+	(pf_arg (pf_arg_identifier))
+	(pf_arg (pf_arg_identifier)))))
+  (arged_command (cmd_identifier)
+    (pf_dot_cmd_args
+      (pf_args
+	(pf_arg (pf_arg_identifier))
+	(pf_arg (pf_arg_identifier)))
+      (pf_arg_identifier)
+      (pf_args (pf_arg (pf_arg_identifier)))))
+  (arged_command (cmd_identifier)
+    (pf_dot_cmd_args
+      (pf_args
+	(pf_arg (pf_arg_identifier))
+	(pf_arg (pf_arg_identifier))))))
+
+
+=================
+pfv format fields
+=================
+
+pfv.fmt_name
+pfv.fmt_name.field_name
+pfv.fmt_name.field_name=0xdeadbeef
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (pf_dot_cmd_args
+      (pf_args
+	(pf_arg (pf_arg_identifier)))))
+  (arged_command (cmd_identifier)
+    (pf_dot_cmd_args
+      (pf_args
+	(pf_arg (pf_arg_identifier))
+	(pf_arg (pf_arg_identifier)))))
+  (arged_command (cmd_identifier)
+    (pf_dot_cmd_args
+      (pf_args
+	(pf_arg (pf_arg_identifier))
+	(pf_arg (pf_arg_identifier)))
+      (pf_arg_identifier)
+      (pf_args (pf_arg (pf_arg_identifier))))))
+
+======================
+pf.fmt_name definition
+======================
+
+pf.obj xxdz prev next size name
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (pf_new_args
+      (pf_arg (pf_arg_identifier))
+      (pf_args
+	(pf_arg (pf_arg_identifier))
+	(pf_arg (pf_arg_identifier))
+	(pf_arg (pf_arg_identifier))
+	(pf_arg (pf_arg_identifier))
+	(pf_arg (pf_arg_identifier))))))
+
+
+=======
+List pf
+=======
+
+pf.
+
+---
+
+(commands
+  (arged_command (cmd_identifier)))
+
+=================
+Pf load from file
+=================
+
+pfo /tmp/myfile
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier)))))
+
+
+==========================
+pf format with parentheses
+==========================
+
+pf B (BitFldType)arg_name
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (pf_args
+      (pf_arg (pf_arg_identifier))
+      (pf_arg
+        (pf_concatenation
+	  (pf_arg_identifier)
+	  (pf_args (pf_arg (pf_arg_identifier)))
+	  (pf_arg_identifier)
+	  (pf_arg_identifier))))))
+
+
+==========
+Cf example
+==========
+
+Cf 64 [2]zwww e_magic e_cblp e_cp e_crlc e_cparhdr
+Cf-
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args
+      (arg (arg_identifier))
+      (pf_args
+	(pf_arg (pf_arg_identifier))
+	(pf_arg (pf_arg_identifier))
+	(pf_arg (pf_arg_identifier))
+	(pf_arg (pf_arg_identifier))
+	(pf_arg (pf_arg_identifier))
+	(pf_arg (pf_arg_identifier)))))
+  (arged_command (cmd_identifier)))
--- a/shlr/radare2-shell-parser/corpus/pipes.txt
+++ b/shlr/radare2-shell-parser/corpus/pipes.txt
@ -0,0 +1,94 @@
+======================
+Disable html and color
+======================
+
+p8 10 |
+
+---
+
+(commands
+  (html_disable_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))))
+
+======================
+Enable html and color
+======================
+
+p8 10 |H
+
+---
+
+(commands
+  (html_enable_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))))
+
+======================
+Use sctr.ts
+======================
+
+p8 10 |T
+
+---
+
+(commands
+  (scr_tts_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))))
+
+======================
+Pipe to another command
+======================
+
+p8 10 | grep 10
+
+---
+
+(commands
+  (pipe_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (pipe_second_command)))
+
+===========
+Double pipe
+===========
+
+pd 10 | cat | grep mov
+
+---
+
+(commands
+  (pipe_command
+    (pipe_command
+      (arged_command (cmd_identifier)
+        (args (arg (arg_identifier))))
+      (pipe_second_command))
+    (pipe_second_command)))
+
+===================================
+Pipe to another command with no arg
+===================================
+
+f | grep main
+
+---
+
+(commands
+  (pipe_command
+    (arged_command (cmd_identifier))
+    (pipe_second_command)))
+
+=======================
+Pipe to interpreter "."
+=======================
+
+p8 10 |.
+
+---
+
+(commands
+  (arged_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))))
--- a/shlr/radare2-shell-parser/corpus/quoted_args.txt
+++ b/shlr/radare2-shell-parser/corpus/quoted_args.txt
@ -0,0 +1,53 @@
+==============================
+Echo with (double) quoted args
+==============================
+
+?e "This;is.one@string"
+
+---
+
+(commands
+  (arged_command command: (cmd_identifier)
+    args: (args (arg (double_quoted_arg)))))
+
+==============================
+Echo with (single) quoted args
+==============================
+
+?e 'This;is.one@string'
+
+---
+
+(commands
+  (arged_command command: (cmd_identifier)
+    args: (args (arg (single_quoted_arg)))))
+
+
+===========================
+Legacy command - all quoted
+===========================
+
+"?e This;is.one@string"
+
+---
+
+(commands
+  (legacy_quoted_command))
+
+
+=======================================
+Double quoted arg with cmd substitution
+=======================================
+
+?e "This is $(?e "a command")"
+
+---
+
+(commands
+  (arged_command command: (cmd_identifier)
+    args: (args
+             (arg (double_quoted_arg
+	       (cmd_substitution_arg
+	         (arged_command command: (cmd_identifier)
+		   args: (args
+		            (arg (double_quoted_arg))))))))))
--- a/shlr/radare2-shell-parser/corpus/redirects.txt
+++ b/shlr/radare2-shell-parser/corpus/redirects.txt
@ -0,0 +1,73 @@
+===============
+Redirect stdout
+===============
+
+afl > /tmp/test.txt
+
+---
+
+(commands
+  (redirect_command
+    (arged_command (cmd_identifier))
+    (fdn_redirect_operator)
+    (arg (arg_identifier))))
+
+
+===============
+Redirect stderr
+===============
+
+afl 2> /tmp/test.txt
+
+---
+
+(commands
+  (redirect_command
+    (arged_command (cmd_identifier))
+    (fdn_redirect_operator (file_descriptor))
+    (arg (arg_identifier))))
+
+
+=============
+Redirect HTML
+=============
+
+afl H> /tmp/test.txt
+
+---
+
+(commands
+  (redirect_command
+    (arged_command (cmd_identifier))
+    (html_redirect_operator)
+    (arg (arg_identifier))))
+
+
+===============
+Append to file
+===============
+
+afl >> /tmp/test.txt
+
+---
+
+(commands
+   (redirect_command
+    (arged_command (cmd_identifier))
+    (fdn_append_operator)
+    (arg (arg_identifier))))
+
+
+===============
+Append err to file
+===============
+
+afl 2>> /tmp/test.txt
+
+---
+
+(commands
+   (redirect_command
+     (arged_command (cmd_identifier))
+   (fdn_append_operator (file_descriptor))
+   (arg (arg_identifier))))
--- a/shlr/radare2-shell-parser/corpus/repeated_commands.txt
+++ b/shlr/radare2-shell-parser/corpus/repeated_commands.txt
@ -0,0 +1,63 @@
+=========================
+One digit repeat commands
+=========================
+
+7/x 90
+
+---
+
+(commands
+  (repeat_command
+    (number)
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))))
+
+
+=========================
+Multiple digits repeat commands
+=========================
+
+17/x 90
+
+---
+
+(commands
+  (repeat_command
+    (number)
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))))
+
+
+=======================
+Repeat with redirection
+=======================
+
+2p8 8 > /tmp/out.txt
+
+---
+
+(commands
+  (redirect_command
+    (repeat_command
+      (number)
+      (arged_command (cmd_identifier)
+	(args (arg (arg_identifier)))))
+    (fdn_redirect_operator)
+    (arg (arg_identifier))))
+
+
+====================
+Repeat with tmp seek
+====================
+
+2p8 8 @ 0xdeadbeef
+
+---
+
+(commands
+  (tmp_seek_command
+    (repeat_command
+      (number)
+      (arged_command (cmd_identifier)
+        (args (arg (arg_identifier)))))
+    (arg (arg_identifier))))
--- a/shlr/radare2-shell-parser/corpus/search_commands.txt
+++ b/shlr/radare2-shell-parser/corpus/search_commands.txt
@ -0,0 +1,53 @@
+=============
+Simple search
+=============
+
+/ "foo\x00"
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args (arg (double_quoted_arg)))))
+
+
+==============
+Not matching
+==============
+
+/! ff
+/!x 00
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier))))
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier)))))
+
+
+=======
+Reg exp
+=======
+
+/e /E.F/i
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier)))))
+
+
+===================
+Search with nibbles
+===================
+
+/x ff..33
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier)))))
--- a/shlr/radare2-shell-parser/corpus/simple_commands.txt
+++ b/shlr/radare2-shell-parser/corpus/simple_commands.txt
@ -0,0 +1,245 @@
+================
+No arguments
+================
+
+afl
+ afl
+
+---
+
+(commands
+  (arged_command (cmd_identifier))
+  (arged_command (cmd_identifier)))
+
+=============
+One argument
+=============
+
+af 0xdeadbeef
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+     (args (arg (arg_identifier)))))
+
+
+==============================
+Semi-colon separated commands
+==============================
+
+afl;af 0xdeadbeef  ; afl
+
+---
+
+(commands
+  (arged_command (cmd_identifier))
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier))))
+  (arged_command (cmd_identifier)))
+
+
+=========================
+Newline separate commands
+=========================
+
+afl
+af 0xdeadbeef
+
+---
+
+(commands
+  (arged_command (cmd_identifier))
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier)))))
+
+
+======================
+Question mark commands
+======================
+
+? x
+??
+???
+?t cmd
+?x hello
+?:
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier))))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier))))
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier))))
+  (arged_command (cmd_identifier)))
+
+
+===============
+Search commands
+===============
+
+/x 90
+/v4 0xdead
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier))))
+  (arged_command (cmd_identifier)
+    (args (arg (arg_identifier)))))
+
+
+========================
+Different output formats
+========================
+
+afl*
+afl+
+aflj
+
+---
+
+(commands
+  (arged_command (cmd_identifier))
+  (arged_command (cmd_identifier))
+  (arged_command (cmd_identifier)))
+
+
+=============
+Help commands
+=============
+
+?
+a?
+$?
+*?
+afl?
+(?
+=?
+/v?
+/v4?
+|?
+.?
+./?
+@?
+@@?
+@@@?
+~?
+?*
+p?*
+&?
+pf??
+pf???
+
+---
+
+(commands
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier))
+  (help_command (cmd_identifier)))
+
+
+===============================
+Command with concatenation args
+===============================
+
+?e Hello" World"'!' And All
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args
+      (arg
+	(concatenation
+	  (arg_identifier)
+	  (double_quoted_arg)
+	  (single_quoted_arg)))
+      (arg (arg_identifier))
+      (arg (arg_identifier)))))
+
+
+==============
+Number command
+==============
+
+0x8048000
+0b1001
+0xtest
+0123123
+0x123command
+
+---
+
+(commands
+  (number_command)
+  (number_command)
+  (number_command) (ERROR)
+  (number_command)
+  (number_command) (ERROR))
+
+
+=============
+Tasks command
+=============
+
+& ?e Hello
+& ?e Hello
+&& 1
+&=
+
+---
+
+(commands
+  (task_command (cmd_identifier)
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier)))))
+  (task_command (cmd_identifier)
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier)))))
+  (task_command (cmd_identifier)
+    (args (arg (arg_identifier))))
+  (task_command (cmd_identifier)))
+
+
+==============
+Arg with (...)
+==============
+
+?e Hello(World)
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args
+      (arg
+        (concatenation
+	  (arg_identifier)
+	  (arg_identifier)
+	  (args (arg (arg_identifier)))
+	  (arg_identifier))))))
--- a/shlr/radare2-shell-parser/corpus/special_commands.txt
+++ b/shlr/radare2-shell-parser/corpus/special_commands.txt
@ -0,0 +1,256 @@
+======================
+Pointer type commands
+======================
+
+*entry0
+*entry0=cc
+*entry0+10=cc
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args
+      (args (arg (arg_identifier)))))
+  (arged_command (cmd_identifier)
+    (args
+      (args (arg (arg_identifier)))
+      (arg_identifier)
+      (args (arg (arg_identifier)))))
+  (arged_command (cmd_identifier)
+    (args
+      (args (arg (arg_identifier)))
+      (arg_identifier)
+      (args (arg (arg_identifier))))))
+
+
+==============================
+Environment variable command
+==============================
+
+%
+%SHELL
+%TMPDIR=/tmp
+
+---
+
+(commands
+  (arged_command (cmd_identifier))
+  (arged_command (cmd_identifier)
+    (args
+      (args (arg (arg_identifier)))))
+  (arged_command (cmd_identifier)
+    (args
+      (args (arg (arg_identifier)))
+      (arg_identifier)
+      (args (arg (arg_identifier))))))
+
+
+===============
+Macro commands
+===============
+
+(_foo x y; p8 $0 @ $1)
+(-foo)
+(foo x y; p8 $0 @ $1)(10 0x10)
+(pdstr bits; s $0; .(pdenc guess); .(pdenc utf$0le); .(pdenc utf$0be))
+(foo; pd 10~this is special\))
+(
+(*
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (macro_args
+      (macro_content
+        (arg (arg_identifier))
+	(args
+	  (arg (arg_identifier))
+	  (arg (arg_identifier)))
+        (tmp_seek_command
+	  (arged_command (cmd_identifier)
+	    (args (arg (arg_identifier))))
+	      (arg (arg_identifier))))))
+  (arged_command (cmd_identifier)
+    (macro_args
+      (macro_content
+        (arg (arg_identifier)))))
+  (arged_command (cmd_identifier)
+    (macro_args
+      (macro_content
+        (arg (arg_identifier))
+	(args
+	  (arg (arg_identifier))
+	  (arg (arg_identifier)))
+        (tmp_seek_command
+	  (arged_command (cmd_identifier)
+	    (args (arg (arg_identifier))))
+	  (arg (arg_identifier))))
+      (macro_call_full_content
+        (macro_call_content
+	  (args
+	    (arg (arg_identifier))
+	    (arg (arg_identifier)))))))
+  (arged_command (cmd_identifier)
+    (macro_args
+      (macro_content
+        (arg (arg_identifier))
+	(args (arg (arg_identifier)))
+        (arged_command (cmd_identifier)
+	  (args (arg (arg_identifier))))
+	(arged_command (cmd_identifier)
+	  (macro_call_content
+	    (args
+	      (arg (arg_identifier))
+	      (arg (arg_identifier)))))
+	(arged_command (cmd_identifier)
+	  (macro_call_content
+	    (args
+	      (arg (arg_identifier))
+	      (arg (arg_identifier)))))
+	(arged_command (cmd_identifier)
+	  (macro_call_content
+	    (args
+	      (arg (arg_identifier))
+	      (arg (arg_identifier))))))))
+  (arged_command (cmd_identifier)
+    (macro_args
+      (macro_content
+        (arg (arg_identifier))
+	(grep_command
+	  (arged_command (cmd_identifier)
+	    (args (arg (arg_identifier))))
+	  (grep_specifier (grep_specifier_identifier))))))
+  (arged_command (cmd_identifier))
+  (arged_command (cmd_identifier)))
+
+
+===============
+System commands
+===============
+
+!
+!ls
+!!
+!!ls
+!=!
+
+---
+
+(commands
+  (arged_command (system_identifier))
+  (arged_command (system_identifier)
+    (args (arg (arg_identifier))))
+  (arged_command (system_identifier))
+  (arged_command (system_identifier)
+    (args (arg (arg_identifier))))
+  (arged_command (system_identifier)))
+
+
+===================
+Interpret r2 commands
+===================
+
+.cmd a1
+.. myfile
+. myfile.r2
+.* file
+.!rabin2 -ri $FILE
+.(foo 1 2 3)
+./ ELF
+pd 10 |.
+
+---
+
+(commands
+  (arged_command
+    command: (cmd_identifier)
+    args: (arged_command command: (cmd_identifier)
+            args: (args (arg (arg_identifier)))))
+  (arged_command
+    command: (cmd_identifier)
+    args: (args (arg (arg_identifier))))
+  (arged_command
+    command: (cmd_identifier)
+    args: (args (arg (arg_identifier))))
+  (arged_command
+    command: (cmd_identifier)
+    args: (args (arg (arg_identifier))))
+  (arged_command
+    command: (cmd_identifier)
+    args: (interpret_arg))
+  (arged_command
+    command: (cmd_identifier)
+    args: (macro_call_content
+            (args
+	      (arg (arg_identifier))
+	      (arg (arg_identifier))
+	      (arg (arg_identifier))
+	      (arg (arg_identifier)))))
+  (arged_command
+    command: (cmd_identifier)
+    args: (args (arg (arg_identifier))))
+  (arged_command
+    args: (arged_command command: (cmd_identifier)
+            args: (args (arg (arg_identifier)))))
+  )
+
+
+============
+Last cmd
+============
+
+.
+...
+
+---
+
+(commands
+  (last_command (cmd_identifier))
+  (last_command (cmd_identifier)))
+
+===================
+Interpreter commands
+===================
+
+#!
+#!python arg0
+#!rust
+#!?
+
+---
+
+(commands
+  (arged_command (cmd_identifier))
+  (arged_command (cmd_identifier)
+    (args
+      (arg (arg_identifier))
+      (arg (arg_identifier))))
+  (arged_command (cmd_identifier)
+    (args
+      (arg (arg_identifier))))
+  (help_command (cmd_identifier)))
+
+
+=======================================
+Pointer type commands with substitution
+=======================================
+
+*entr$(?e y0)=$(?v $$)
+
+---
+
+(commands
+  (arged_command (cmd_identifier)
+    (args
+      (args
+        (arg (arg_identifier))
+	(arg (cmd_substitution_arg
+	        (arged_command (cmd_identifier)
+		  (args (arg (arg_identifier)))))))
+      (arg_identifier)
+      (args
+        (arg (cmd_substitution_arg
+	        (arged_command (cmd_identifier)
+		  (args (arg (arg_identifier))))))))))
--- a/shlr/radare2-shell-parser/corpus/temporary_changes.txt
+++ b/shlr/radare2-shell-parser/corpus/temporary_changes.txt
@ -0,0 +1,263 @@
+======================
+Temporary changes help
+======================
+
+@?
+
+---
+
+(commands
+  (help_command (cmd_identifier)))
+
+==============
+Temporary seek
+==============
+
+p8 10 @ 0xdeadbeef
+p8 10 @ flag
+
+---
+
+(commands
+  (tmp_seek_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (arg (arg_identifier)))
+  (tmp_seek_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (arg (arg_identifier))))
+
+
+===================
+Temporary blocksize
+===================
+
+p8 10 @! 30
+
+---
+
+(commands
+  (tmp_blksz_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (arg (arg_identifier))))
+
+
+=================
+Temporary from/to
+=================
+
+/x 9090 @{0xbeef 0xdead}
+
+---
+
+(commands
+  (tmp_fromto_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (arg (arg_identifier))
+    (arg (arg_identifier))))
+
+
+=================
+Temporary arch
+=================
+
+pd 2 @a:x86
+
+---
+
+(commands
+  (tmp_arch_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (arg (arg_identifier))))
+
+=================
+Temporary bits
+=================
+
+pd 2 @b:16
+
+---
+
+(commands
+  (tmp_bits_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (arg (arg_identifier))))
+
+=================
+Temporary seek to nth instr
+=================
+
+pd 2 @B:3
+
+---
+
+(commands
+  (tmp_nthi_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (arg (arg_identifier))))
+
+=================
+Temporary eval
+=================
+
+pd 2 @e:scr.utf8=false
+pd 2 @e:asm.arch=x86,scr.utf8=true
+
+---
+
+(commands
+  (tmp_eval_command
+    (arged_command command: (cmd_identifier)
+      args: (args (arg (arg_identifier))))
+    (tmp_eval_args (tmp_eval_arg)))
+  (tmp_eval_command
+    (arged_command command: (cmd_identifier)
+      args: (args (arg (arg_identifier))))
+    (tmp_eval_args
+      (tmp_eval_arg)
+      (tmp_eval_arg))))
+
+=================
+Temporary flagspace
+=================
+
+f @F:symbols
+
+---
+
+(commands
+  (tmp_fs_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))))
+
+=================
+Temporary relative instruction
+=================
+
+pd 2 @i:4
+
+---
+
+(commands
+  (tmp_reli_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (arg (arg_identifier))))
+
+=================
+Temporary seek to sdb key
+=================
+
+pd @k:key
+
+---
+
+(commands
+  (tmp_kuery_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))))
+
+=================
+Temporary switch fd
+=================
+
+pd @o:3
+
+---
+
+(commands
+  (tmp_fd_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))))
+
+=================
+Temporary seek to reg value
+=================
+
+pd @r:rax
+
+---
+
+(commands
+  (tmp_reg_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))))
+
+=================
+Temporary file content
+=================
+
+pd @f:myfile
+
+---
+
+(commands
+  (tmp_file_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))))
+
+
+=================
+Temporary string content
+=================
+
+pd @s:mystring
+
+---
+
+(commands
+  (tmp_string_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))))
+
+
+=================
+Temporary hex content
+=================
+
+pd @x:90deadbeef
+
+---
+
+(commands
+  (tmp_hex_command
+    (arged_command (cmd_identifier))
+    (arg (arg_identifier))))
+
+
+=================================
+2 Temporary changes + Redirection
+=================================
+
+p8 4 @ 0xdead @a:x86 > /tmp/out.txt
+
+---
+
+(commands
+  (redirect_command
+    (tmp_arch_command
+      (tmp_seek_command
+        (arged_command (cmd_identifier)
+	  (args (arg (arg_identifier))))
+	(arg (arg_identifier)))
+      (arg (arg_identifier)))
+    (fdn_redirect_operator)
+    (arg (arg_identifier))))
+
+
+=====================
+Spaces after tmp_seek
+=====================
+wx  0x68a0a@entry0 
+---
+(commands
+  (tmp_seek_command
+    (arged_command (cmd_identifier)
+      (args (arg (arg_identifier))))
+    (arg (arg_identifier))))
--- a/shlr/radare2-shell-parser/grammar.js
+++ b/shlr/radare2-shell-parser/grammar.js
@ -0,0 +1,699 @@
+const SPECIAL_CHARACTERS = [
+    '\\s',
+    '@', '|', '#',
+    '"', '\'', '>',
+    ';', '$', '`',
+    '~', '\\', ',',
+    '(', ')',
+];
+
+const PF_SPECIAL_CHARACTERS = [
+    '\\s',
+    '@', '|', '#',
+    '"', '\'', '>',
+    ';', '$', '`',
+    '~', '\\', '(',
+    ')',
+];
+
+const PF_DOT_SPECIAL_CHARACTERS = PF_SPECIAL_CHARACTERS.concat(['.', '=']);
+const SPECIAL_CHARACTERS_EQUAL = SPECIAL_CHARACTERS.concat(['=']);
+const SPECIAL_CHARACTERS_COMMA = SPECIAL_CHARACTERS.concat([',']);
+const SPECIAL_CHARACTERS_BRACE = SPECIAL_CHARACTERS.concat(['{', '}']);
+
+const ARG_IDENTIFIER_BASE = choice(
+    repeat1(noneOf(...SPECIAL_CHARACTERS)),
+    '$$$',
+    '$$',
+    '$',
+    /\$[^@|#"'>;`~\\({) ]/,
+    /\${[^\r\n $}]+}/,
+    /\\./,
+    /\/[^\*]/,
+);
+const ARG_IDENTIFIER_BRACE = choice(
+    repeat1(noneOf(...SPECIAL_CHARACTERS_BRACE)),
+    '$$$',
+    '$$',
+    '$',
+    /\$[^@|#"'>;`~\\({) ]/,
+    /\${[^\r\n $}]+}/,
+    /\\./,
+    /\/[^\*]/,
+);
+const PF_DOT_ARG_IDENTIFIER_BASE = choice(
+    repeat1(noneOf(...PF_DOT_SPECIAL_CHARACTERS)),
+    '$$$',
+    '$$',
+    '$',
+    /\$[^@|#"'>;`~\\({) ]/,
+    /\${[^\r\n $}]+}/,
+    /\\./,
+    /\/[^\*]/,
+);
+const PF_ARG_IDENTIFIER_BASE = choice(
+    repeat1(noneOf(...PF_SPECIAL_CHARACTERS)),
+    '$$$',
+    '$$',
+    '$',
+    /\$[^@|#"'>;`~\\({) ]/,
+    /\${[^\r\n $}]+}/,
+    /\\./,
+    /\/[^\*]/,
+);
+
+module.exports = grammar({
+    name: 'r2cmd',
+
+    extras: $ => [
+	$._comment,
+	/[ \t]*/,
+    ],
+
+    externals: $ => [
+	$.cmd_identifier,
+	$._help_command,
+	$.file_descriptor,
+	$._eq_sep_concat,
+	$._concat,
+	$._concat_brace,
+	$._concat_pf_dot,
+    ],
+
+    inline: $ => [
+	$.cmd_delimiter,
+	$.cmd_delimiter_singleline,
+	$._comment,
+    ],
+
+    rules: {
+	commands: $ => choice(
+	    seq(),
+	    seq(repeat($.cmd_delimiter)),
+	    seq(
+		repeat($.cmd_delimiter),
+		$._command,
+		repeat(seq($.cmd_delimiter, optional($._command)))
+	    ),
+	),
+	_commands_singleline: $ => prec(1,seq(
+	    repeat($.cmd_delimiter_singleline),
+	    $._command,
+	    repeat(seq($.cmd_delimiter_singleline, optional($._command)))
+	)),
+
+	_command: $ => choice(
+	    $.redirect_command,
+	    $._simple_command,
+	),
+
+	legacy_quoted_command: $ => seq(
+	    '"',
+	    field('string', token(prec(-1, /([^"\\]|\\(.|\n))+/))),
+	    '"',
+	),
+
+	_simple_command: $ => choice(
+	    $.help_command,
+	    $.repeat_command,
+	    $.arged_command,
+	    $.number_command,
+	    $.task_command,
+	    $._tmp_command,
+	    $._iter_command,
+	    $._foreach_command,
+	    $._pipe_command,
+	    $.grep_command,
+	    $.last_command,
+	    $.legacy_quoted_command,
+	    $._pf_commands,
+	),
+
+	_tmp_command: $ => choice(
+	    $.tmp_seek_command,
+	    $.tmp_blksz_command,
+	    $.tmp_fromto_command,
+	    $.tmp_arch_command,
+	    $.tmp_bits_command,
+	    $.tmp_nthi_command,
+	    $.tmp_eval_command,
+	    $.tmp_fs_command,
+	    $.tmp_reli_command,
+	    $.tmp_kuery_command,
+	    $.tmp_fd_command,
+	    $.tmp_reg_command,
+	    $.tmp_file_command,
+	    $.tmp_string_command,
+	    $.tmp_hex_command,
+	),
+
+	_iter_command: $ => choice(
+	    $.iter_flags_command,
+	    $.iter_dbta_command,
+	    $.iter_dbtb_command,
+	    $.iter_dbts_command,
+	    $.iter_file_lines_command,
+	    $.iter_offsets_command,
+	    $.iter_sdbquery_command,
+	    $.iter_threads_command,
+	    $.iter_bbs_command,
+	    $.iter_instrs_command,
+	    $.iter_sections_command,
+	    $.iter_functions_command,
+	    $.iter_step_command,
+	    $.iter_interpret_command,
+	    $.iter_hit_command,
+	),
+
+	_foreach_command: $ => choice(
+	    $.foreach_addrsize_command,
+	    $.foreach_bb_command,
+	    $.foreach_cmd_command,
+	    $.foreach_comment_command,
+	    $.foreach_import_command,
+	    $.foreach_register_command,
+	    $.foreach_symbol_command,
+	    $.foreach_string_command,
+	    $.foreach_section_command,
+	    $.foreach_iomap_command,
+	    $.foreach_dbgmap_command,
+	    $.foreach_flag_command,
+	    $.foreach_function_command,
+	    $.foreach_thread_command,
+	),
+
+	_pipe_command: $ => choice(
+	    $.html_disable_command,
+	    $.html_enable_command,
+	    $.pipe_command,
+	    $.scr_tts_command,
+	),
+
+	grep_command: $ => seq(
+	    field('command', $._simple_command),
+	    '~',
+	    field('specifier', $.grep_specifier),
+	),
+	// FIXME: improve parser for grep specifier
+	// grep_specifier_identifier also includes ~ because r2 does not support nested grep commands yet
+	grep_specifier_identifier: $ => token(seq(repeat1(
+	    choice(
+		/[^\n\r;#@>|`$()]+/,
+		/\\./,
+		/\$[^(\r\n;#>|`]/,
+	    )
+	))),
+	grep_specifier: $ => prec.left(choice(
+	    seq(
+		repeat1(
+		    choice(
+			$.grep_specifier_identifier,
+			$.cmd_substitution_arg,
+		    ),
+		),
+		optional(alias(/[$]+/, $.grep_specifier_identifier)),
+	    ),
+	    alias(/[$]+/, $.grep_specifier_identifier),
+	)),
+
+	html_disable_command: $ => prec.right(1, seq(
+	    field('command', $._simple_command),
+	    '|'
+	)),
+	html_enable_command: $ => prec.right(1, seq(
+	    field('command', $._simple_command),
+	    '|H'
+	)),
+	scr_tts_command: $ => prec.right(1, seq(
+	    field('command', $._simple_command),
+	    '|T'
+	)),
+	pipe_command: $ => seq($._simple_command, '|', $.pipe_second_command),
+	pipe_second_command: $ => /[^|\r\n;]+/,
+
+	foreach_addrsize_command: $ => prec.right(1, seq($._simple_command, '@@@=', repeat1(seq($.arg, $.arg)))),
+	foreach_bb_command: $ => prec.right(1, seq($._simple_command, '@@@b')),
+	foreach_cmd_command: $ => prec.right(1, seq($._simple_command, '@@@c:', $._simple_command)),
+	foreach_comment_command: $ => prec.right(1, seq($._simple_command, '@@@C:', $.arg)),
+	foreach_import_command: $ => prec.right(1, seq($._simple_command, '@@@i')),
+	foreach_register_command: $ => prec.right(1, seq($._simple_command, '@@@r')),
+	foreach_symbol_command: $ => prec.right(1, seq($._simple_command, '@@@s')),
+	foreach_string_command: $ => prec.right(1, seq($._simple_command, '@@@st')),
+	foreach_section_command: $ => prec.right(1, seq($._simple_command, '@@@S')),
+	foreach_iomap_command: $ => prec.right(1, seq($._simple_command, '@@@m')),
+	foreach_dbgmap_command: $ => prec.right(1, seq($._simple_command, '@@@M')),
+	foreach_flag_command: $ => prec.right(1,
+	    choice(
+		seq($._simple_command, '@@@f'),
+		seq($._simple_command, '@@@f:', $.arg),
+	    ),
+	),
+	foreach_function_command: $ => prec.right(1,
+	    choice(
+		seq($._simple_command, '@@@F'),
+		seq($._simple_command, '@@@F:', $.arg)
+	    )
+	),
+	foreach_thread_command: $ => prec.right(1, seq($._simple_command, '@@@t')),
+
+	iter_flags_command: $ => prec.right(1, seq($._simple_command, '@@', $.arg)),
+	iter_dbta_command: $ => prec.right(1, seq($._simple_command, choice('@@dbt', '@@dbta'))),
+	iter_dbtb_command: $ => prec.right(1, seq($._simple_command, '@@dbtb')),
+	iter_dbts_command: $ => prec.right(1, seq($._simple_command, '@@dbts')),
+	iter_file_lines_command: $ => prec.right(1, seq($._simple_command, '@@.', $.arg)),
+	iter_offsets_command: $ => prec.right(1, seq($._simple_command, '@@=', optional($.args))),
+	iter_sdbquery_command: $ => prec.right(1, seq($._simple_command, '@@k', $.arg)),
+	iter_threads_command: $ => prec.right(1, seq($._simple_command, '@@t')),
+	iter_bbs_command: $ => prec.right(1, seq($._simple_command, '@@b')),
+	iter_instrs_command: $ => prec.right(1, seq($._simple_command, '@@i')),
+	iter_sections_command: $ => prec.right(1, seq($._simple_command, '@@iS')),
+	iter_functions_command: $ => prec.right(1, seq($._simple_command, '@@f', optional(seq(':', $.arg)))),
+	iter_step_command: $ => prec.right(1, seq($._simple_command, '@@s:', $.arg, $.arg, $.arg)),
+	iter_interpret_command: $ => prec.right(1, seq($._simple_command, '@@c:', $._simple_command)),
+	iter_hit_command: $ => prec.right(1, seq(
+	    $._simple_command,
+	    '@@',
+	    $._concat,
+	    alias($._search_command, $.arged_command)
+	)),
+
+	// tmp changes commands
+	tmp_seek_command: $ => prec.right(1, seq($._simple_command, '@', $.arg)),
+	tmp_blksz_command: $ => prec.right(1, seq($._simple_command, '@!', $.arg)),
+	// NOTE: need to use special arg_brace here because of https://github.com/radareorg/radare2/commit/c3dee9332c19f874ac2cc9294a9ffe17575d8141
+	tmp_fromto_command: $ => prec.right(1, seq(
+	    $._simple_command,
+	    '@{',
+	    alias($.arg_brace, $.arg),
+	    alias($.arg_brace, $.arg),
+	    '}'
+	)),
+	tmp_arch_command: $ => prec.right(1, seq($._simple_command, '@a:', $.arg)),
+	tmp_bits_command: $ => prec.right(1, seq($._simple_command, '@b:', $.arg)),
+	tmp_nthi_command: $ => prec.right(1, seq($._simple_command, '@B:', $.arg)),
+	tmp_eval_command: $ => prec.right(1, seq($._simple_command, '@e:', $.tmp_eval_args)),
+	tmp_fs_command: $ => prec.right(1, seq($._simple_command, '@F:', $.arg)),
+	tmp_reli_command: $ => prec.right(1, seq($._simple_command, '@i:', $.arg)),
+	tmp_kuery_command: $ => prec.right(1, seq($._simple_command, '@k:', $.arg)),
+	tmp_fd_command: $ => prec.right(1, seq($._simple_command, '@o:', $.arg)),
+	tmp_reg_command: $ => prec.right(1, seq($._simple_command, '@r:', $.arg)),
+	tmp_file_command: $ => prec.right(1, seq($._simple_command, '@f:', $.arg)),
+	tmp_string_command: $ => prec.right(1, seq($._simple_command, '@s:', $.arg)),
+	tmp_hex_command: $ => prec.right(1, seq($._simple_command, '@x:', $.arg)),
+
+	_interpreter_command: $ => prec.right(1, seq(
+	    field('command', alias('#!', $.cmd_identifier)),
+	    field('args', optional($.args)),
+	)),
+
+	// basic commands
+	task_command: $ => prec.left(1, choice(
+	    seq(
+		field('command', alias(choice('&', '&t'), $.cmd_identifier)),
+		field('args', $._simple_command),
+	    ),
+	    seq(
+		field('command', alias(/&[A-Za-z=\-+*&0-9]*/, $.cmd_identifier)),
+		field('args', optional($.args)),
+	    ),
+	)),
+	number_command: $ => choice(
+	    $._dec_number,
+	    '0',
+	    /(0x[0-9A-Fa-f]+|0b[0-1]+)/,
+	),
+	help_command: $ => prec.left(1, choice(
+	    field('command', alias($.question_mark_identifier, $.cmd_identifier)),
+	    seq(
+		field('command', alias(choice($._help_command, '#?', '#!?'), $.cmd_identifier)),
+		field('args', optional($.args)),
+	    ),
+	)),
+	arged_command: $ => choice(
+	    $._simple_arged_command,
+	    $._math_arged_command,
+	    $._pointer_arged_command,
+	    $._macro_arged_command,
+	    $._system_command,
+	    $._interpret_command,
+	    $._env_command,
+	    $._interpreter_command,
+	    $._pf_arged_command,
+	),
+
+	_simple_arged_command: $ => prec.left(1, seq(
+	    field('command', $.cmd_identifier),
+	    field('args', optional($.args)),
+	)),
+	_search_command: $ => prec.left(1, seq(
+	    field('command', alias(/\/[A-Za-z0-9+!\/*]*/, $.cmd_identifier)),
+	    field('args', optional($.args)),
+	)),
+	_math_arged_command: $ => prec.left(1, seq(
+	    field('command', alias($.question_mark_identifier, $.cmd_identifier)),
+	    field('args', $.args),
+	)),
+	_pointer_arged_command: $ => prec.left(1, seq(
+	    field('command', alias($.pointer_identifier, $.cmd_identifier)),
+	    field('args', alias($.eq_sep_args, $.args)),
+	)),
+	_macro_arged_command: $ => prec.left(1, seq(
+	    field('command', alias($.macro_identifier, $.cmd_identifier)),
+	    field('args', optional($.macro_args)),
+	)),
+	_system_command: $ => prec.left(1, seq(
+	    field('command', $.system_identifier),
+	    optional(field('args', $.args)),
+	)),
+	_interpret_command: $ => prec.left(1, choice(
+	    seq(
+		field('command', alias('.', $.cmd_identifier)),
+		field('args', $._simple_command),
+	    ),
+	    seq(
+		field('command', alias($._interpret_identifier, $.cmd_identifier)),
+		field('args', optional($.args)),
+	    ),
+	    seq(
+		field('command', alias('.!', $.cmd_identifier)),
+		field('args', $.interpret_arg),
+	    ),
+	    seq(
+		field('command', alias('.(', $.cmd_identifier)),
+		field('args', $.macro_call_content),
+	    ),
+	    seq(
+		field('command', alias($._interpret_search_identifier, $.cmd_identifier)),
+		field('args', $.args),
+	    ),
+	    prec.right(1, seq(
+		field('args', $._simple_command),
+		field('command', '|.'),
+	    )),
+	)),
+	_interpret_search_identifier: $ => seq('./'),
+	_pf_arged_command: $ => choice(
+	    seq(
+		field('command', alias($.pf_dot_cmd_identifier, $.cmd_identifier)),
+	    ),
+	    seq(
+		field('command', alias('pfo', $.cmd_identifier)),
+		field('args', $.args),
+	    ),
+	),
+	_pf_commands: $ => prec.left(1, choice(
+	    // pf fmt, pf* fmt_name|fmt, pfc fmt_name|fmt, pfd.fmt_name, pfj fmt_name|fmt, pfq fmt, pfs.struct_name, pfs format
+	    alias($.pf_cmd, $.arged_command),
+	    // pf.fmt_name.field_name, pf.fmt_name.field_name[i], pf.fmt_name.field_name=33, pfv.fmt_name[.field]
+	    alias($.pf_dot_cmd, $.arged_command),
+	    // pf.name [0|cnt]fmt
+	    alias($.pf_new_cmd, $.arged_command),
+	    // Cf [sz] [fmt]
+	    alias($.Cf_cmd, $.arged_command),
+	    // pf., pfo fdf_name: will be handled as regular arged_command
+	)),
+	Cf_cmd: $ => prec.left(seq(
+	    field('command', alias('Cf', $.cmd_identifier)),
+	    optional(field('args', alias($._Cf_args, $.args))),
+	)),
+	_Cf_args: $ => seq(
+	    $.arg,
+	    $.pf_args,
+	),
+	pf_dot_cmd_identifier: $ => 'pf.',
+	pf_dot_full_cmd_identifier: $ => /pf[*cjqsv]\./,
+	pf_new_cmd: $ => seq(
+	    field('command', alias($.pf_dot_cmd_identifier, $.cmd_identifier)),
+	    $._concat_pf_dot,
+	    field('args', $.pf_new_args),
+	),
+	pf_dot_cmd: $ => prec.left(1, seq(
+	    field('command', alias(choice($.pf_dot_cmd_identifier, $.pf_dot_full_cmd_identifier), $.cmd_identifier)),
+	    $._concat_pf_dot,
+	    field('args', $.pf_dot_cmd_args),
+	)),
+	pf_cmd: $ => seq(
+	    field('command', alias(/pf[*cjqs]?/, $.cmd_identifier)),
+	    field('args', $.pf_args),
+	),
+	pf_new_args: $ => seq(
+	    alias($.pf_dot_arg, $.pf_arg),
+	    $.pf_args,
+	),
+	pf_dot_cmd_args: $ => seq(
+	    alias($.pf_dot_args, $.pf_args),
+	    optional(seq(
+		alias('=', $.pf_arg_identifier),
+		$.pf_args,
+	    )),
+	),
+	_pf_dot_arg_identifier: $ => token(seq(
+	    repeat1(PF_DOT_ARG_IDENTIFIER_BASE),
+	)),
+	_pf_arg_parentheses: $ => seq(
+	    alias('(', $.pf_arg_identifier),
+	    $.pf_args,
+	    alias(')', $.pf_arg_identifier),
+	),
+	pf_arg_identifier: $ => token(seq(
+	    repeat1(PF_ARG_IDENTIFIER_BASE),
+	)),
+	_pf_arg: $ => choice(
+	    $.pf_arg_identifier,
+	    $._pf_arg_parentheses,
+	    $.cmd_substitution_arg,
+	),
+	_pf_dot_arg: $ => choice(
+	    alias($._pf_dot_arg_identifier, $.pf_arg_identifier),
+	    $.cmd_substitution_arg,
+	),
+	pf_concatenation: $ => prec(-1, seq(
+	    $._pf_arg,
+	    repeat1(prec(-1, seq(
+		$._concat,
+		$._pf_arg,
+	    ))),
+	)),
+	pf_dot_concatenation: $ => prec(-1, seq(
+	    $._pf_dot_arg,
+	    repeat1(prec(-1, seq(
+		$._concat_pf_dot,
+		$._pf_dot_arg,
+	    ))),
+	)),
+	pf_arg: $ => choice(
+	    $._pf_arg,
+	    $.pf_concatenation
+	),
+	pf_dot_arg: $ => choice(
+	    $._pf_dot_arg,
+	    alias($.pf_dot_concatenation, $.pf_concatenation),
+	),
+	pf_args: $ => prec.left(repeat1($.pf_arg)),
+	pf_dot_args: $ => prec.left(1, seq(
+	    alias($.pf_dot_arg, $.pf_arg),
+	    repeat(seq(
+		$._concat_pf_dot,
+		'.',
+		$._concat_pf_dot,
+		alias($.pf_dot_arg, $.pf_arg),
+	    )),
+	)),
+	_env_command: $ => prec.left(seq(
+	    field('command', alias($._env_command_identifier, $.cmd_identifier)),
+	    field('args', optional(alias($.eq_sep_args, $.args))),
+	)),
+	_env_command_identifier: $ => choice('%', 'env'),
+	last_command: $ => seq(
+	    field('command', alias($.last_command_identifier, $.cmd_identifier)),
+	),
+
+	last_command_identifier: $ => choice('.', '...'),
+	_interpret_identifier: $ => prec(1, choice(
+	    /\.[\.:\-*]+[ ]*/,
+	    /\.[ ]+/,
+	)),
+	interpret_arg: $ => $._any_command,
+	system_identifier: $ => /![\*!-=]*/,
+	question_mark_identifier: $ => '?',
+
+	repeat_command: $ => prec.left(1, seq(
+	    field('arg', alias($._dec_number, $.number)),
+	    field('command', $._simple_command),
+	)),
+
+	pointer_identifier: $ => '*',
+	eq_sep_args: $ => seq(
+	    alias($.eq_sep_key, $.args),
+	    optional(seq(
+		alias('=', $.arg_identifier),
+		alias($.eq_sep_val, $.args)
+	    )),
+	),
+	macro_identifier: $ => /\([-\*]?/,
+	macro_call_content: $ => prec.left(seq(
+	    optional($.args),
+	    ')',
+	)),
+	macro_call_full_content: $ => seq('(', $.macro_call_content),
+	macro_content: $ => prec(1, seq(
+	    field('name', $.arg),
+	    optional($.args),
+	    optional(seq(
+		';',
+		$._command,
+		repeat(seq(';', $._command)),
+	    )),
+	    ')',
+	)),
+	macro_args: $ => seq(
+	    $.macro_content,
+	    optional(
+		seq(
+		    optional($.macro_call_full_content),
+		)
+	    ),
+	),
+
+	redirect_command: $ => prec.right(2, seq(
+	    field('command', $._simple_command),
+	    field('redirect_operator', $._redirect_operator),
+	    field('arg', $.arg),
+	)),
+	_redirect_operator: $ => choice(
+	    $.fdn_redirect_operator,
+	    $.fdn_append_operator,
+	    $.html_redirect_operator,
+	    $.html_append_operator,
+	),
+	fdn_redirect_operator: $ => seq(optional($.file_descriptor), '>'),
+	fdn_append_operator: $ => seq(optional($.file_descriptor), '>>'),
+	html_redirect_operator: $ => 'H>',
+	html_append_operator: $ => 'H>>',
+
+	_arg: $ => choice(
+	    $.arg_identifier,
+	    $.double_quoted_arg,
+	    $.single_quoted_arg,
+	    $.cmd_substitution_arg,
+	    seq(
+		alias('(', $.arg_identifier),
+		$.args,
+		alias(')', $.arg_identifier),
+	    ),
+	    alias(',', $.arg_identifier),
+	),
+	_arg_brace: $ => choice(
+	    alias($.arg_identifier_brace, $.arg_identifier),
+	    $.double_quoted_arg,
+	    $.single_quoted_arg,
+	    $.cmd_substitution_arg,
+	    seq(
+		alias('(', $.arg_identifier),
+		$._arg_brace,
+		alias(')', $.arg_identifier),
+	    ),
+	    alias(',', $.arg_identifier),
+	),
+	arg: $ => choice(
+	    $._arg,
+	    $.concatenation,
+	),
+	arg_brace: $ => choice(
+	    $._arg_brace,
+	    alias($.concatenation_brace, $.concatenation),
+	),
+	args: $ => prec.left(repeat1($.arg)),
+	// TODO: this should accept a quoted_arg and a cmd_substitution_arg as well
+	tmp_eval_args: $ => prec.left(seq($.tmp_eval_arg, repeat(seq(',', $.tmp_eval_arg)))),
+	tmp_eval_arg: $ => repeat1(noneOf(...SPECIAL_CHARACTERS_COMMA)),
+
+	_eq_sep_key_single: $ => choice(
+	    alias ($._eq_sep_key_identifier, $.arg_identifier),
+	    $.double_quoted_arg,
+	    $.single_quoted_arg,
+	    $.cmd_substitution_arg,
+	),
+	eq_sep_key: $ => prec.left(seq(
+	    alias($._eq_sep_key_single, $.arg),
+	    repeat(seq(
+		$._eq_sep_concat,
+		alias($._eq_sep_key_single, $.arg),
+	    )),
+	)),
+	_eq_sep_key_identifier: $ => token(repeat1(
+	    choice(
+		repeat1(noneOf(...SPECIAL_CHARACTERS_EQUAL)),
+		/\$[^({]/,
+		/\${[^\r\n $}]+}/,
+		escape(...SPECIAL_CHARACTERS_EQUAL),
+	    )
+	)),
+	eq_sep_val: $ => prec.left(seq(
+	    $.arg,
+	    repeat(seq(
+		$._eq_sep_concat,
+		$.arg,
+	    )),
+	)),
+	_any_command: $ => /[^\r\n;~|]+/,
+
+	arg_identifier: $ => token(repeat1(ARG_IDENTIFIER_BASE)),
+	arg_identifier_brace: $ => token(repeat1(ARG_IDENTIFIER_BRACE)),
+	double_quoted_arg: $ => seq(
+	    '"',
+	    repeat(choice(
+		/[^\\"\n$`]+/,
+		/\$[^("]?/,
+		/\\[\\"\n$`]?/,
+		$.cmd_substitution_arg,
+	    )),
+	    '"',
+	),
+	single_quoted_arg: $ => seq(
+	    '\'',
+	    repeat(choice(
+		/[^\\'\n]+/,
+		/\\[\\'\n]?/,
+	    )),
+	    '\'',
+	),
+	cmd_substitution_arg: $ => choice(
+	    seq('$(', $._commands_singleline, ')'),
+	    prec(1, seq('`', $._commands_singleline, '`')),
+	),
+	concatenation: $ => prec(-1, seq(
+	    $._arg,
+	    repeat1(prec(-1, seq(
+		$._concat,
+		$._arg,
+	    ))),
+	)),
+	concatenation_brace: $ => prec(-1, seq(
+	    $._arg_brace,
+	    repeat1(prec(-1, seq(
+		$._concat_brace,
+		$._arg_brace,
+	    ))),
+	)),
+
+	_dec_number: $ => choice(/[1-9][0-9]*/, /[0-9][0-9]+/),
+	_comment: $ => token(choice(
+	    '#',
+	    /#[^!][^\r\n]*/,
+	    seq('/*', /[^*]*\*+([^/*][^*]*\*+)*/, '/')
+	)),
+
+	cmd_delimiter: $ => choice(
+	    '\n',
+	    '\r',
+	    $.cmd_delimiter_singleline,
+	),
+	cmd_delimiter_singleline: $ => choice(';'),
+    }
+});
+
+function noneOf(...characters) {
+    const negatedString = characters.map(c => c == '\\' ? '\\\\' : c).join('')
+    return new RegExp('[^' + negatedString + ']')
+}
--- a/shlr/radare2-shell-parser/index.js
+++ b/shlr/radare2-shell-parser/index.js
@ -0,0 +1,13 @@
+try {
+  module.exports = require("./build/Release/tree_sitter_r2cmd_binding");
+} catch (error) {
+  try {
+    module.exports = require("./build/Debug/tree_sitter_r2cmd_binding");
+  } catch (_) {
+    throw error
+  }
+}
+
+try {
+  module.exports.nodeTypeInfo = require("./src/node-types.json");
+} catch (_) {}
--- a/shlr/radare2-shell-parser/package-lock.json
+++ b/shlr/radare2-shell-parser/package-lock.json
@ -0,0 +1,19 @@
+{
+  "name": "tree-sitter-r2cmd",
+  "version": "1.0.0",
+  "lockfileVersion": 1,
+  "requires": true,
+  "dependencies": {
+    "nan": {
+      "version": "2.14.0",
+      "resolved": "https://registry.npmjs.org/nan/-/nan-2.14.0.tgz",
+      "integrity": "sha512-INOFj37C7k3AfaNTtX8RhsTw7qRy7eLET14cROi9+5HAVbbHuIWUHEauBv5qT4Av2tWasiTY1Jw6puUNqRJXQg=="
+    },
+    "tree-sitter-cli": {
+      "version": "0.16.4",
+      "resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.16.4.tgz",
+      "integrity": "sha512-akCVeK7oOZD+frizRbBx3h6OBlVBxOCNtfpt9nz3zvOdRuJTwoyJUshzF28J+hfcuvQ+yfoZx9/R+2S7NZE2TA==",
+      "dev": true
+    }
+  }
+}
--- a/shlr/radare2-shell-parser/package.json
+++ b/shlr/radare2-shell-parser/package.json
@ -0,0 +1,17 @@
+{
+  "name": "tree-sitter-r2cmd",
+  "version": "1.0.0",
+  "description": "Tree-Sitter grammar for parsing radare2 commands",
+  "main": "index.js",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "author": "Riccardo Schirone",
+  "license": "ISC",
+  "dependencies": {
+    "nan": "^2.14.0"
+  },
+  "devDependencies": {
+    "tree-sitter-cli": "^0.16.4"
+  }
+}
--- a/shlr/radare2-shell-parser/src/binding.cc
+++ b/shlr/radare2-shell-parser/src/binding.cc
@ -0,0 +1,28 @@
+#include "tree_sitter/parser.h"
+#include <node.h>
+#include "nan.h"
+
+using namespace v8;
+
+extern "C" TSLanguage * tree_sitter_r2cmd();
+
+namespace {
+
+NAN_METHOD(New) {}
+
+void Init(Local<Object> exports, Local<Object> module) {
+  Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
+  tpl->SetClassName(Nan::New("Language").ToLocalChecked());
+  tpl->InstanceTemplate()->SetInternalFieldCount(1);
+
+  Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
+  Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
+  Nan::SetInternalFieldPointer(instance, 0, tree_sitter_r2cmd());
+
+  Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("r2cmd").ToLocalChecked());
+  Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
+}
+
+NODE_MODULE(tree_sitter_r2cmd_binding, Init)
+
+}  // namespace
--- a/shlr/radare2-shell-parser/src/grammar.json
+++ b/shlr/radare2-shell-parser/src/grammar.json
--- a/shlr/radare2-shell-parser/src/node-types.json
+++ b/shlr/radare2-shell-parser/src/node-types.json
--- a/shlr/radare2-shell-parser/src/parser.c
+++ b/shlr/radare2-shell-parser/src/parser.c
--- a/shlr/radare2-shell-parser/src/scanner.c
+++ b/shlr/radare2-shell-parser/src/scanner.c
@ -0,0 +1,169 @@
+#include <tree_sitter/parser.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#define CMD_IDENTIFIER_MAX_LENGTH 32
+
+enum TokenType {
+	CMD_IDENTIFIER,
+	HELP_COMMAND,
+	FILE_DESCRIPTOR,
+	EQ_SEP_CONCAT,
+	CONCAT,
+	CONCAT_BRACE,
+	CONCAT_PF_DOT,
+};
+
+void *tree_sitter_r2cmd_external_scanner_create() {
+	return NULL;
+}
+
+void tree_sitter_r2cmd_external_scanner_destroy(void *payload) {
+}
+
+unsigned tree_sitter_r2cmd_external_scanner_serialize(void *payload, char *buffer) {
+	return 0;
+}
+
+void tree_sitter_r2cmd_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
+}
+
+static bool is_pf_cmd(const char *s) {
+	return !strncmp (s, "pf", 2) || !strcmp (s, "Cf");
+}
+
+static bool is_env_cmd(const char *s) {
+	return !strncmp (s, "env", 3);
+}
+
+static bool is_at_cmd(const char *s) {
+	return s[0] == '@';
+}
+
+static bool is_comment(const char *s) {
+	return !strncmp (s, "/*", 2);
+}
+
+static bool is_special_start(const int32_t ch) {
+	return ch == '*' || ch == '(' || ch == '*' || ch == '@' || ch == '|' ||
+		ch == '.' || ch == '|' || ch == '%' || ch == '~' || ch == '&';
+}
+
+static bool is_start_of_command(const int32_t ch) {
+	return isalpha (ch) || ch == '$' || ch == '?' || ch == ':' || ch == '+' ||
+		ch == '=' || ch == '/' || ch == '_' || is_special_start (ch);
+}
+
+static bool is_mid_command(const char *res, const int32_t ch) {
+	return isalnum(ch) ||  ch == '$' || ch == '?' || ch == '.' || ch == '!' ||
+		ch == ':' || ch == '+' || ch == '=' || ch == '/' || ch == '*' ||
+		ch == '-' || ch == ',' || ch == '&' || (is_at_cmd (res) && ch == '@');
+}
+
+static bool is_concat(const int32_t ch) {
+	return ch != '\0' && !isspace(ch) && ch != '#' && ch != '@' &&
+		ch != '|' && ch != '>' && ch != ';' &&
+		ch != ')' && ch != '`' && ch != '~' && ch != '\\';
+}
+
+static bool is_concat_brace(const int32_t ch) {
+	return is_concat(ch) && ch != '}' && ch != '{';
+}
+
+static bool is_concat_pf_dot(const int32_t ch) {
+	return is_concat(ch) && ch != '=';
+}
+
+static bool is_recursive_help(int id_len, const int32_t before_last_ch, const int32_t last_ch) {
+	return id_len >= 2 && before_last_ch == '?' && last_ch == '*';
+}
+
+static bool scan_number(TSLexer *lexer, const bool *valid_symbols) {
+	if (!valid_symbols[FILE_DESCRIPTOR]) {
+		return false;
+	}
+
+	// skip spaces at the beginning
+	while (isspace (lexer->lookahead)) {
+		lexer->advance (lexer, true);
+	}
+
+	if (!isdigit (lexer->lookahead)) {
+		return false;
+	}
+	lexer->advance (lexer, false);
+	for (;;) {
+		if (isdigit (lexer->lookahead)) {
+			lexer->advance (lexer, false);
+		} else if (lexer->lookahead != '>') {
+			return false;
+		} else {
+			break;
+		}
+	}
+	if (lexer->lookahead == '>') {
+		lexer->result_symbol = FILE_DESCRIPTOR;
+		return true;
+	}
+	return false;
+}
+
+bool tree_sitter_r2cmd_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
+	// FIXME: /* in the shell should become a multiline comment
+	if (valid_symbols[EQ_SEP_CONCAT] && !isspace(lexer->lookahead) && lexer->lookahead != '=' && lexer->lookahead != '\0') {
+		lexer->result_symbol = EQ_SEP_CONCAT;
+		return true;
+	}
+	if (valid_symbols[CONCAT] && is_concat (lexer->lookahead)) {
+		lexer->result_symbol = CONCAT;
+		return true;
+	} else if (valid_symbols[CONCAT_BRACE] && is_concat_brace (lexer->lookahead)) {
+		lexer->result_symbol = CONCAT_BRACE;
+		return true;
+	} else if (valid_symbols[CONCAT_PF_DOT] && is_concat_pf_dot (lexer->lookahead)) {
+		lexer->result_symbol = CONCAT_PF_DOT;
+		return true;
+	}
+        if (valid_symbols[CMD_IDENTIFIER] || valid_symbols[HELP_COMMAND]) {
+		char res[CMD_IDENTIFIER_MAX_LENGTH + 1];
+		int i_res = 0;
+
+		while (isspace (lexer->lookahead)) {
+			lexer->advance (lexer, true);
+		}
+
+		if (!is_start_of_command (lexer->lookahead)) {
+			return false;
+		}
+		res[i_res++] = lexer->lookahead;
+		if (res[0] == '#') {
+			return false;
+		}
+		lexer->advance (lexer, false);
+		while (i_res < CMD_IDENTIFIER_MAX_LENGTH && is_mid_command (res, lexer->lookahead)) {
+			res[i_res++] = lexer->lookahead;
+			lexer->advance (lexer, false);
+                }
+		res[i_res] = '\0';
+		if (is_comment (res)) {
+			return false;
+		}
+		if (res[i_res - 1] == '?' || (i_res >= 2 && is_recursive_help(i_res, res[i_res - 2], res[i_res - 1]))) {
+			if (i_res == 1) {
+				return false;
+			}
+			lexer->result_symbol = HELP_COMMAND;
+		} else {
+			if (is_special_start (res[0]) || is_pf_cmd (res) || is_env_cmd (res) || is_at_cmd (res) || !valid_symbols[CMD_IDENTIFIER]) {
+				return false;
+			}
+			lexer->result_symbol = CMD_IDENTIFIER;
+		}
+		return true;
+	}
+	if (valid_symbols[FILE_DESCRIPTOR]) {
+		return scan_number (lexer, valid_symbols);
+	}
+	return false;
+}
--- a/shlr/radare2-shell-parser/src/tree_sitter/parser.h
+++ b/shlr/radare2-shell-parser/src/tree_sitter/parser.h
@ -0,0 +1,223 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+#endif
+
+typedef struct {
+  TSFieldId field_id;
+  uint8_t child_index;
+  bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+  uint16_t index;
+  uint16_t length;
+} TSFieldMapSlice;
+
+typedef uint16_t TSStateId;
+
+typedef struct {
+  bool visible : 1;
+  bool named : 1;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+  int32_t lookahead;
+  TSSymbol result_symbol;
+  void (*advance)(TSLexer *, bool);
+  void (*mark_end)(TSLexer *);
+  uint32_t (*get_column)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
+};
+
+typedef enum {
+  TSParseActionTypeShift,
+  TSParseActionTypeReduce,
+  TSParseActionTypeAccept,
+  TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef struct {
+  union {
+    struct {
+      TSStateId state;
+      bool extra : 1;
+      bool repetition : 1;
+    };
+    struct {
+      TSSymbol symbol;
+      int16_t dynamic_precedence;
+      uint8_t child_count;
+      uint8_t production_id;
+    };
+  } params;
+  TSParseActionType type : 4;
+} TSParseAction;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+  TSParseAction action;
+  struct {
+    uint8_t count;
+    bool reusable : 1;
+  };
+} TSParseActionEntry;
+
+struct TSLanguage {
+  uint32_t version;
+  uint32_t symbol_count;
+  uint32_t alias_count;
+  uint32_t token_count;
+  uint32_t external_token_count;
+  const char **symbol_names;
+  const TSSymbolMetadata *symbol_metadata;
+  const uint16_t *parse_table;
+  const TSParseActionEntry *parse_actions;
+  const TSLexMode *lex_modes;
+  const TSSymbol *alias_sequences;
+  uint16_t max_alias_sequence_length;
+  bool (*lex_fn)(TSLexer *, TSStateId);
+  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+  TSSymbol keyword_capture_token;
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)(void);
+    void (*destroy)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    unsigned (*serialize)(void *, char *);
+    void (*deserialize)(void *, const char *, unsigned);
+  } external_scanner;
+  uint32_t field_count;
+  const TSFieldMapSlice *field_map_slices;
+  const TSFieldMapEntry *field_map_entries;
+  const char **field_names;
+  uint32_t large_state_count;
+  const uint16_t *small_parse_table;
+  const uint32_t *small_parse_table_map;
+  const TSSymbol *public_symbol_map;
+};
+
+/*
+ *  Lexer Macros
+ */
+
+#define START_LEXER()           \
+  bool result = false;          \
+  bool skip = false;            \
+  bool eof = false;             \
+  int32_t lookahead;            \
+  goto start;                   \
+  next_state:                   \
+  lexer->advance(lexer, skip);  \
+  start:                        \
+  skip = false;                 \
+  lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+  {                          \
+    state = state_value;     \
+    goto next_state;         \
+  }
+
+#define SKIP(state_value) \
+  {                       \
+    skip = true;          \
+    state = state_value;  \
+    goto next_state;      \
+  }
+
+#define ACCEPT_TOKEN(symbol_value)     \
+  result = true;                       \
+  lexer->result_symbol = symbol_value; \
+  lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ *  Parse Table Macros
+ */
+
+#define SMALL_STATE(id) id - LARGE_STATE_COUNT
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value)              \
+  {                                     \
+    {                                   \
+      .type = TSParseActionTypeShift,   \
+      .params = {.state = state_value}, \
+    }                                   \
+  }
+
+#define SHIFT_REPEAT(state_value)     \
+  {                                   \
+    {                                 \
+      .type = TSParseActionTypeShift, \
+      .params = {                     \
+        .state = state_value,         \
+        .repetition = true            \
+      },                              \
+    }                                 \
+  }
+
+#define RECOVER()                        \
+  {                                      \
+    { .type = TSParseActionTypeRecover } \
+  }
+
+#define SHIFT_EXTRA()                 \
+  {                                   \
+    {                                 \
+      .type = TSParseActionTypeShift, \
+      .params = {.extra = true}       \
+    }                                 \
+  }
+
+#define REDUCE(symbol_val, child_count_val, ...) \
+  {                                              \
+    {                                            \
+      .type = TSParseActionTypeReduce,           \
+      .params = {                                \
+        .symbol = symbol_val,                    \
+        .child_count = child_count_val,          \
+        __VA_ARGS__                              \
+      }                                          \
+    }                                            \
+  }
+
+#define ACCEPT_INPUT()                  \
+  {                                     \
+    { .type = TSParseActionTypeAccept } \
+  }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_PARSER_H_
--- a/shlr/tree-sitter/lib/include/tree_sitter/api.h
+++ b/shlr/tree-sitter/lib/include/tree_sitter/api.h
@ -0,0 +1,876 @@
+#ifndef TREE_SITTER_API_H_
+#define TREE_SITTER_API_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+/****************************/
+/* Section - ABI Versioning */
+/****************************/
+
+/**
+ * The latest ABI version that is supported by the current version of the
+ * library. When Languages are generated by the Tree-sitter CLI, they are
+ * assigned an ABI version number that corresponds to the current CLI version.
+ * The Tree-sitter library is generally backwards-compatible with languages
+ * generated using older CLI versions, but is not forwards-compatible.
+ */
+#define TREE_SITTER_LANGUAGE_VERSION 11
+
+/**
+ * The earliest ABI version that is supported by the current version of the
+ * library.
+ */
+#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 9
+
+/*******************/
+/* Section - Types */
+/*******************/
+
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+typedef struct TSParser TSParser;
+typedef struct TSTree TSTree;
+typedef struct TSQuery TSQuery;
+typedef struct TSQueryCursor TSQueryCursor;
+
+typedef enum {
+  TSInputEncodingUTF8,
+  TSInputEncodingUTF16,
+} TSInputEncoding;
+
+typedef enum {
+  TSSymbolTypeRegular,
+  TSSymbolTypeAnonymous,
+  TSSymbolTypeAuxiliary,
+} TSSymbolType;
+
+typedef struct {
+  uint32_t row;
+  uint32_t column;
+} TSPoint;
+
+typedef struct {
+  TSPoint start_point;
+  TSPoint end_point;
+  uint32_t start_byte;
+  uint32_t end_byte;
+} TSRange;
+
+typedef struct {
+  void *payload;
+  const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read);
+  TSInputEncoding encoding;
+} TSInput;
+
+typedef enum {
+  TSLogTypeParse,
+  TSLogTypeLex,
+} TSLogType;
+
+typedef struct {
+  void *payload;
+  void (*log)(void *payload, TSLogType, const char *);
+} TSLogger;
+
+typedef struct {
+  uint32_t start_byte;
+  uint32_t old_end_byte;
+  uint32_t new_end_byte;
+  TSPoint start_point;
+  TSPoint old_end_point;
+  TSPoint new_end_point;
+} TSInputEdit;
+
+typedef struct {
+  uint32_t context[4];
+  const void *id;
+  const TSTree *tree;
+} TSNode;
+
+typedef struct {
+  const void *tree;
+  const void *id;
+  uint32_t context[2];
+} TSTreeCursor;
+
+typedef struct {
+  TSNode node;
+  uint32_t index;
+} TSQueryCapture;
+
+typedef struct {
+  uint32_t id;
+  uint16_t pattern_index;
+  uint16_t capture_count;
+  const TSQueryCapture *captures;
+} TSQueryMatch;
+
+typedef enum {
+  TSQueryPredicateStepTypeDone,
+  TSQueryPredicateStepTypeCapture,
+  TSQueryPredicateStepTypeString,
+} TSQueryPredicateStepType;
+
+typedef struct {
+  TSQueryPredicateStepType type;
+  uint32_t value_id;
+} TSQueryPredicateStep;
+
+typedef enum {
+  TSQueryErrorNone = 0,
+  TSQueryErrorSyntax,
+  TSQueryErrorNodeType,
+  TSQueryErrorField,
+  TSQueryErrorCapture,
+} TSQueryError;
+
+/********************/
+/* Section - Parser */
+/********************/
+
+/**
+ * Create a new parser.
+ */
+TSParser *ts_parser_new(void);
+
+/**
+ * Delete the parser, freeing all of the memory that it used.
+ */
+void ts_parser_delete(TSParser *parser);
+
+/**
+ * Set the language that the parser should use for parsing.
+ *
+ * Returns a boolean indicating whether or not the language was successfully
+ * assigned. True means assignment succeeded. False means there was a version
+ * mismatch: the language was generated with an incompatible version of the
+ * Tree-sitter CLI. Check the language's version using `ts_language_version`
+ * and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and
+ * `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants.
+ */
+bool ts_parser_set_language(TSParser *self, const TSLanguage *language);
+
+/**
+ * Get the parser's current language.
+ */
+const TSLanguage *ts_parser_language(const TSParser *self);
+
+/**
+ * Set the ranges of text that the parser should include when parsing.
+ *
+ * By default, the parser will always include entire documents. This function
+ * allows you to parse only a *portion* of a document but still return a syntax
+ * tree whose ranges match up with the document as a whole. You can also pass
+ * multiple disjoint ranges.
+ *
+ * The second and third parameters specify the location and length of an array
+ * of ranges. The parser does *not* take ownership of these ranges; it copies
+ * the data, so it doesn't matter how these ranges are allocated.
+ *
+ * If `length` is zero, then the entire document will be parsed. Otherwise,
+ * the given ranges must be ordered from earliest to latest in the document,
+ * and they must not overlap. That is, the following must hold for all
+ * `i` < `length - 1`:
+ *
+ *     ranges[i].end_byte <= ranges[i + 1].start_byte
+ *
+ * If this requirement is not satisfied, the operation will fail, the ranges
+ * will not be assigned, and this function will return `false`. On success,
+ * this function returns `true`
+ */
+bool ts_parser_set_included_ranges(
+  TSParser *self,
+  const TSRange *ranges,
+  uint32_t length
+);
+
+/**
+ * Get the ranges of text that the parser will include when parsing.
+ *
+ * The returned pointer is owned by the parser. The caller should not free it
+ * or write to it. The length of the array will be written to the given
+ * `length` pointer.
+ */
+const TSRange *ts_parser_included_ranges(
+  const TSParser *self,
+  uint32_t *length
+);
+
+/**
+ * Use the parser to parse some source code and create a syntax tree.
+ *
+ * If you are parsing this document for the first time, pass `NULL` for the
+ * `old_tree` parameter. Otherwise, if you have already parsed an earlier
+ * version of this document and the document has since been edited, pass the
+ * previous syntax tree so that the unchanged parts of it can be reused.
+ * This will save time and memory. For this to work correctly, you must have
+ * already edited the old syntax tree using the `ts_tree_edit` function in a
+ * way that exactly matches the source code changes.
+ *
+ * The `TSInput` parameter lets you specify how to read the text. It has the
+ * following three fields:
+ * 1. `read`: A function to retrieve a chunk of text at a given byte offset
+ *    and (row, column) position. The function should return a pointer to the
+ *    text and write its length to the the `bytes_read` pointer. The parser
+ *    does not take ownership of this buffer; it just borrows it until it has
+ *    finished reading it. The function should write a zero value to the
+ *    `bytes_read` pointer to indicate the end of the document.
+ * 2. `payload`: An arbitrary pointer that will be passed to each invocation
+ *    of the `read` function.
+ * 3. `encoding`: An indication of how the text is encoded. Either
+ *    `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.
+ *
+ * This function returns a syntax tree on success, and `NULL` on failure. There
+ * are three possible reasons for failure:
+ * 1. The parser does not have a language assigned. Check for this using the
+      `ts_parser_language` function.
+ * 2. Parsing was cancelled due to a timeout that was set by an earlier call to
+ *    the `ts_parser_set_timeout_micros` function. You can resume parsing from
+ *    where the parser left out by calling `ts_parser_parse` again with the
+ *    same arguments. Or you can start parsing from scratch by first calling
+ *    `ts_parser_reset`.
+ * 3. Parsing was cancelled using a cancellation flag that was set by an
+ *    earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing
+ *    from where the parser left out by calling `ts_parser_parse` again with
+ *    the same arguments.
+ */
+TSTree *ts_parser_parse(
+  TSParser *self,
+  const TSTree *old_tree,
+  TSInput input
+);
+
+/**
+ * Use the parser to parse some source code stored in one contiguous buffer.
+ * The first two parameters are the same as in the `ts_parser_parse` function
+ * above. The second two parameters indicate the location of the buffer and its
+ * length in bytes.
+ */
+TSTree *ts_parser_parse_string(
+  TSParser *self,
+  const TSTree *old_tree,
+  const char *string,
+  uint32_t length
+);
+
+/**
+ * Use the parser to parse some source code stored in one contiguous buffer with
+ * a given encoding. The first four parameters work the same as in the
+ * `ts_parser_parse_string` method above. The final parameter indicates whether
+ * the text is encoded as UTF8 or UTF16.
+ */
+TSTree *ts_parser_parse_string_encoding(
+  TSParser *self,
+  const TSTree *old_tree,
+  const char *string,
+  uint32_t length,
+  TSInputEncoding encoding
+);
+
+/**
+ * Instruct the parser to start the next parse from the beginning.
+ *
+ * If the parser previously failed because of a timeout or a cancellation, then
+ * by default, it will resume where it left off on the next call to
+ * `ts_parser_parse` or other parsing functions. If you don't want to resume,
+ * and instead intend to use this parser to parse some other document, you must
+ * call `ts_parser_reset` first.
+ */
+void ts_parser_reset(TSParser *self);
+
+/**
+ * Set the maximum duration in microseconds that parsing should be allowed to
+ * take before halting.
+ *
+ * If parsing takes longer than this, it will halt early, returning NULL.
+ * See `ts_parser_parse` for more information.
+ */
+void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout);
+
+/**
+ * Get the duration in microseconds that parsing is allowed to take.
+ */
+uint64_t ts_parser_timeout_micros(const TSParser *self);
+
+/**
+ * Set the parser's current cancellation flag pointer.
+ *
+ * If a non-null pointer is assigned, then the parser will periodically read
+ * from this pointer during parsing. If it reads a non-zero value, it will
+ * halt early, returning NULL. See `ts_parser_parse` for more information.
+ */
+void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag);
+
+/**
+ * Get the parser's current cancellation flag pointer.
+ */
+const size_t *ts_parser_cancellation_flag(const TSParser *self);
+
+/**
+ * Set the logger that a parser should use during parsing.
+ *
+ * The parser does not take ownership over the logger payload. If a logger was
+ * previously assigned, the caller is responsible for releasing any memory
+ * owned by the previous logger.
+ */
+void ts_parser_set_logger(TSParser *self, TSLogger logger);
+
+/**
+ * Get the parser's current logger.
+ */
+TSLogger ts_parser_logger(const TSParser *self);
+
+/**
+ * Set the file descriptor to which the parser should write debugging graphs
+ * during parsing. The graphs are formatted in the DOT language. You may want
+ * to pipe these graphs directly to a `dot(1)` process in order to generate
+ * SVG output. You can turn off this logging by passing a negative number.
+ */
+void ts_parser_print_dot_graphs(TSParser *self, int file);
+
+/******************/
+/* Section - Tree */
+/******************/
+
+/**
+ * Create a shallow copy of the syntax tree. This is very fast.
+ *
+ * You need to copy a syntax tree in order to use it on more than one thread at
+ * a time, as syntax trees are not thread safe.
+ */
+TSTree *ts_tree_copy(const TSTree *self);
+
+/**
+ * Delete the syntax tree, freeing all of the memory that it used.
+ */
+void ts_tree_delete(TSTree *self);
+
+/**
+ * Get the root node of the syntax tree.
+ */
+TSNode ts_tree_root_node(const TSTree *self);
+
+/**
+ * Get the language that was used to parse the syntax tree.
+ */
+const TSLanguage *ts_tree_language(const TSTree *);
+
+/**
+ * Edit the syntax tree to keep it in sync with source code that has been
+ * edited.
+ *
+ * You must describe the edit both in terms of byte offsets and in terms of
+ * (row, column) coordinates.
+ */
+void ts_tree_edit(TSTree *self, const TSInputEdit *edit);
+
+/**
+ * Compare an old edited syntax tree to a new syntax tree representing the same
+ * document, returning an array of ranges whose syntactic structure has changed.
+ *
+ * For this to work correctly, the old syntax tree must have been edited such
+ * that its ranges match up to the new tree. Generally, you'll want to call
+ * this function right after calling one of the `ts_parser_parse` functions.
+ * You need to pass the old tree that was passed to parse, as well as the new
+ * tree that was returned from that function.
+ *
+ * The returned array is allocated using `malloc` and the caller is responsible
+ * for freeing it using `free`. The length of the array will be written to the
+ * given `length` pointer.
+ */
+TSRange *ts_tree_get_changed_ranges(
+  const TSTree *old_tree,
+  const TSTree *new_tree,
+  uint32_t *length
+);
+
+/**
+ * Write a DOT graph describing the syntax tree to the given file.
+ */
+void ts_tree_print_dot_graph(const TSTree *, FILE *);
+
+/******************/
+/* Section - Node */
+/******************/
+
+/**
+ * Get the node's type as a null-terminated string.
+ */
+const char *ts_node_type(TSNode);
+
+/**
+ * Get the node's type as a numerical id.
+ */
+TSSymbol ts_node_symbol(TSNode);
+
+/**
+ * Get the node's start byte.
+ */
+uint32_t ts_node_start_byte(TSNode);
+
+/**
+ * Get the node's start position in terms of rows and columns.
+ */
+TSPoint ts_node_start_point(TSNode);
+
+/**
+ * Get the node's end byte.
+ */
+uint32_t ts_node_end_byte(TSNode);
+
+/**
+ * Get the node's end position in terms of rows and columns.
+ */
+TSPoint ts_node_end_point(TSNode);
+
+/**
+ * Get an S-expression representing the node as a string.
+ *
+ * This string is allocated with `malloc` and the caller is responsible for
+ * freeing it using `free`.
+ */
+char *ts_node_string(TSNode);
+
+/**
+ * Check if the node is null. Functions like `ts_node_child` and
+ * `ts_node_next_sibling` will return a null node to indicate that no such node
+ * was found.
+ */
+bool ts_node_is_null(TSNode);
+
+/**
+ * Check if the node is *named*. Named nodes correspond to named rules in the
+ * grammar, whereas *anonymous* nodes correspond to string literals in the
+ * grammar.
+ */
+bool ts_node_is_named(TSNode);
+
+/**
+ * Check if the node is *missing*. Missing nodes are inserted by the parser in
+ * order to recover from certain kinds of syntax errors.
+ */
+bool ts_node_is_missing(TSNode);
+
+/**
+ * Check if the node is *extra*. Extra nodes represent things like comments,
+ * which are not required the grammar, but can appear anywhere.
+ */
+bool ts_node_is_extra(TSNode);
+
+/**
+ * Check if a syntax node has been edited.
+ */
+bool ts_node_has_changes(TSNode);
+
+/**
+ * Check if the node is a syntax error or contains any syntax errors.
+ */
+bool ts_node_has_error(TSNode);
+
+/**
+ * Get the node's immediate parent.
+ */
+TSNode ts_node_parent(TSNode);
+
+/**
+ * Get the node's child at the given index, where zero represents the first
+ * child.
+ */
+TSNode ts_node_child(TSNode, uint32_t);
+
+/**
+ * Get the node's number of children.
+ */
+uint32_t ts_node_child_count(TSNode);
+
+/**
+ * Get the node's *named* child at the given index.
+ *
+ * See also `ts_node_is_named`.
+ */
+TSNode ts_node_named_child(TSNode, uint32_t);
+
+/**
+ * Get the node's number of *named* children.
+ *
+ * See also `ts_node_is_named`.
+ */
+uint32_t ts_node_named_child_count(TSNode);
+
+/**
+ * Get the node's child with the given field name.
+ */
+TSNode ts_node_child_by_field_name(
+  TSNode self,
+  const char *field_name,
+  uint32_t field_name_length
+);
+
+/**
+ * Get the node's child with the given numerical field id.
+ *
+ * You can convert a field name to an id using the
+ * `ts_language_field_id_for_name` function.
+ */
+TSNode ts_node_child_by_field_id(TSNode, TSFieldId);
+
+/**
+ * Get the node's next / previous sibling.
+ */
+TSNode ts_node_next_sibling(TSNode);
+TSNode ts_node_prev_sibling(TSNode);
+
+/**
+ * Get the node's next / previous *named* sibling.
+ */
+TSNode ts_node_next_named_sibling(TSNode);
+TSNode ts_node_prev_named_sibling(TSNode);
+
+/**
+ * Get the node's first child that extends beyond the given byte offset.
+ */
+TSNode ts_node_first_child_for_byte(TSNode, uint32_t);
+
+/**
+ * Get the node's first named child that extends beyond the given byte offset.
+ */
+TSNode ts_node_first_named_child_for_byte(TSNode, uint32_t);
+
+/**
+ * Get the smallest node within this node that spans the given range of bytes
+ * or (row, column) positions.
+ */
+TSNode ts_node_descendant_for_byte_range(TSNode, uint32_t, uint32_t);
+TSNode ts_node_descendant_for_point_range(TSNode, TSPoint, TSPoint);
+
+/**
+ * Get the smallest named node within this node that spans the given range of
+ * bytes or (row, column) positions.
+ */
+TSNode ts_node_named_descendant_for_byte_range(TSNode, uint32_t, uint32_t);
+TSNode ts_node_named_descendant_for_point_range(TSNode, TSPoint, TSPoint);
+
+/**
+ * Edit the node to keep it in-sync with source code that has been edited.
+ *
+ * This function is only rarely needed. When you edit a syntax tree with the
+ * `ts_tree_edit` function, all of the nodes that you retrieve from the tree
+ * afterward will already reflect the edit. You only need to use `ts_node_edit`
+ * when you have a `TSNode` instance that you want to keep and continue to use
+ * after an edit.
+ */
+void ts_node_edit(TSNode *, const TSInputEdit *);
+
+/**
+ * Check if two nodes are identical.
+ */
+bool ts_node_eq(TSNode, TSNode);
+
+/************************/
+/* Section - TreeCursor */
+/************************/
+
+/**
+ * Create a new tree cursor starting from the given node.
+ *
+ * A tree cursor allows you to walk a syntax tree more efficiently than is
+ * possible using the `TSNode` functions. It is a mutable object that is always
+ * on a certain syntax node, and can be moved imperatively to different nodes.
+ */
+TSTreeCursor ts_tree_cursor_new(TSNode);
+
+/**
+ * Delete a tree cursor, freeing all of the memory that it used.
+ */
+void ts_tree_cursor_delete(TSTreeCursor *);
+
+/**
+ * Re-initialize a tree cursor to start at a different node.
+ */
+void ts_tree_cursor_reset(TSTreeCursor *, TSNode);
+
+/**
+ * Get the tree cursor's current node.
+ */
+TSNode ts_tree_cursor_current_node(const TSTreeCursor *);
+
+/**
+ * Get the field name of the tree cursor's current node.
+ *
+ * This returns `NULL` if the current node doesn't have a field.
+ * See also `ts_node_child_by_field_name`.
+ */
+const char *ts_tree_cursor_current_field_name(const TSTreeCursor *);
+
+/**
+ * Get the field name of the tree cursor's current node.
+ *
+ * This returns zero if the current node doesn't have a field.
+ * See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`.
+ */
+TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *);
+
+/**
+ * Move the cursor to the parent of its current node.
+ *
+ * This returns `true` if the cursor successfully moved, and returns `false`
+ * if there was no parent node (the cursor was already on the root node).
+ */
+bool ts_tree_cursor_goto_parent(TSTreeCursor *);
+
+/**
+ * Move the cursor to the next sibling of its current node.
+ *
+ * This returns `true` if the cursor successfully moved, and returns `false`
+ * if there was no next sibling node.
+ */
+bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *);
+
+/**
+ * Move the cursor to the first child of its current node.
+ *
+ * This returns `true` if the cursor successfully moved, and returns `false`
+ * if there were no children.
+ */
+bool ts_tree_cursor_goto_first_child(TSTreeCursor *);
+
+/**
+ * Move the cursor to the first child of its current node that extends beyond
+ * the given byte offset.
+ *
+ * This returns the index of the child node if one was found, and returns -1
+ * if no such child was found.
+ */
+int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t);
+
+TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *);
+
+/*******************/
+/* Section - Query */
+/*******************/
+
+/**
+ * Create a new query from a string containing one or more S-expression
+ * patterns. The query is associated with a particular language, and can
+ * only be run on syntax nodes parsed with that language.
+ *
+ * If all of the given patterns are valid, this returns a `TSQuery`.
+ * If a pattern is invalid, this returns `NULL`, and provides two pieces
+ * of information about the problem:
+ * 1. The byte offset of the error is written to the `error_offset` parameter.
+ * 2. The type of error is written to the `error_type` parameter.
+ */
+TSQuery *ts_query_new(
+  const TSLanguage *language,
+  const char *source,
+  uint32_t source_len,
+  uint32_t *error_offset,
+  TSQueryError *error_type
+);
+
+/**
+ * Delete a query, freeing all of the memory that it used.
+ */
+void ts_query_delete(TSQuery *);
+
+/**
+ * Get the number of patterns, captures, or string literals in the query.
+ */
+uint32_t ts_query_pattern_count(const TSQuery *);
+uint32_t ts_query_capture_count(const TSQuery *);
+uint32_t ts_query_string_count(const TSQuery *);
+
+/**
+ * Get the byte offset where the given pattern starts in the query's source.
+ *
+ * This can be useful when combining queries by concatenating their source
+ * code strings.
+ */
+uint32_t ts_query_start_byte_for_pattern(const TSQuery *, uint32_t);
+
+/**
+ * Get all of the predicates for the given pattern in the query.
+ *
+ * The predicates are represented as a single array of steps. There are three
+ * types of steps in this array, which correspond to the three legal values for
+ * the `type` field:
+ * - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names
+ *    of captures. Their `value_id` can be used with the
+ *   `ts_query_capture_name_for_id` function to obtain the name of the capture.
+ * - `TSQueryPredicateStepTypeString` - Steps with this type represent literal
+ *    strings. Their `value_id` can be used with the
+ *    `ts_query_string_value_for_id` function to obtain their string value.
+ * - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*
+ *    that represent the end of an individual predicate. If a pattern has two
+ *    predicates, then there will be two steps with this `type` in the array.
+ */
+const TSQueryPredicateStep *ts_query_predicates_for_pattern(
+  const TSQuery *self,
+  uint32_t pattern_index,
+  uint32_t *length
+);
+
+/**
+ * Get the name and length of one of the query's captures, or one of the
+ * query's string literals. Each capture and string is associated with a
+ * numeric id based on the order that it appeared in the query's source.
+ */
+const char *ts_query_capture_name_for_id(
+  const TSQuery *,
+  uint32_t id,
+  uint32_t *length
+);
+const char *ts_query_string_value_for_id(
+  const TSQuery *,
+  uint32_t id,
+  uint32_t *length
+);
+
+/**
+ * Disable a certain capture within a query.
+ *
+ * This prevents the capture from being returned in matches, and also avoids
+ * any resource usage associated with recording the capture. Currently, there
+ * is no way to undo this.
+ */
+void ts_query_disable_capture(TSQuery *, const char *, uint32_t);
+
+/**
+ * Disable a certain pattern within a query.
+ *
+ * This prevents the pattern from matching and removes most of the overhead
+ * associated with the pattern. Currently, there is no way to undo this.
+ */
+void ts_query_disable_pattern(TSQuery *, uint32_t);
+
+/**
+ * Create a new cursor for executing a given query.
+ *
+ * The cursor stores the state that is needed to iteratively search
+ * for matches. To use the query cursor, first call `ts_query_cursor_exec`
+ * to start running a given query on a given syntax node. Then, there are
+ * two options for consuming the results of the query:
+ * 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the
+ *    the *matches* in the order that they were found. Each match contains the
+ *    index of the pattern that matched, and an array of captures. Because
+ *    multiple patterns can match the same set of nodes, one match may contain
+ *    captures that appear *before* some of the captures from a previous match.
+ * 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the
+ *    individual *captures* in the order that they appear. This is useful if
+ *    don't care about which pattern matched, and just want a single ordered
+ *    sequence of captures.
+ *
+ * If you don't care about consuming all of the results, you can stop calling
+ * `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point.
+ *  You can then start executing another query on another node by calling
+ *  `ts_query_cursor_exec` again.
+ */
+TSQueryCursor *ts_query_cursor_new(void);
+
+/**
+ * Delete a query cursor, freeing all of the memory that it used.
+ */
+void ts_query_cursor_delete(TSQueryCursor *);
+
+/**
+ * Start running a given query on a given node.
+ */
+void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode);
+
+/**
+ * Set the range of bytes or (row, column) positions in which the query
+ * will be executed.
+ */
+void ts_query_cursor_set_byte_range(TSQueryCursor *, uint32_t, uint32_t);
+void ts_query_cursor_set_point_range(TSQueryCursor *, TSPoint, TSPoint);
+
+/**
+ * Advance to the next match of the currently running query.
+ *
+ * If there is a match, write it to `*match` and return `true`.
+ * Otherwise, return `false`.
+ */
+bool ts_query_cursor_next_match(TSQueryCursor *, TSQueryMatch *match);
+void ts_query_cursor_remove_match(TSQueryCursor *, uint32_t id);
+
+/**
+ * Advance to the next capture of the currently running query.
+ *
+ * If there is a capture, write its match to `*match` and its index within
+ * the matche's capture list to `*capture_index`. Otherwise, return `false`.
+ */
+bool ts_query_cursor_next_capture(
+  TSQueryCursor *,
+  TSQueryMatch *match,
+  uint32_t *capture_index
+);
+
+/**********************/
+/* Section - Language */
+/**********************/
+
+/**
+ * Get the number of distinct node types in the language.
+ */
+uint32_t ts_language_symbol_count(const TSLanguage *);
+
+/**
+ * Get a node type string for the given numerical id.
+ */
+const char *ts_language_symbol_name(const TSLanguage *, TSSymbol);
+
+/**
+ * Get the numerical id for the given node type string.
+ */
+TSSymbol ts_language_symbol_for_name(
+  const TSLanguage *self,
+  const char *string,
+  uint32_t length,
+  bool is_named
+);
+
+/**
+ * Get the number of distinct field names in the language.
+ */
+uint32_t ts_language_field_count(const TSLanguage *);
+
+/**
+ * Get the field name string for the given numerical id.
+ */
+const char *ts_language_field_name_for_id(const TSLanguage *, TSFieldId);
+
+/**
+ * Get the numerical id for the given field name string.
+ */
+TSFieldId ts_language_field_id_for_name(const TSLanguage *, const char *, uint32_t);
+
+/**
+ * Check whether the given node type id belongs to named nodes, anonymous nodes,
+ * or a hidden nodes.
+ *
+ * See also `ts_node_is_named`. Hidden nodes are never returned from the API.
+ */
+TSSymbolType ts_language_symbol_type(const TSLanguage *, TSSymbol);
+
+/**
+ * Get the ABI version number for this language. This version number is used
+ * to ensure that languages were generated by a compatible version of
+ * Tree-sitter.
+ *
+ * See also `ts_parser_set_language`.
+ */
+uint32_t ts_language_version(const TSLanguage *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_API_H_
--- a/shlr/tree-sitter/lib/include/tree_sitter/parser.h
+++ b/shlr/tree-sitter/lib/include/tree_sitter/parser.h
@ -0,0 +1,223 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+#endif
+
+typedef struct {
+  TSFieldId field_id;
+  uint8_t child_index;
+  bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+  uint16_t index;
+  uint16_t length;
+} TSFieldMapSlice;
+
+typedef uint16_t TSStateId;
+
+typedef struct {
+  bool visible : 1;
+  bool named : 1;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+  int32_t lookahead;
+  TSSymbol result_symbol;
+  void (*advance)(TSLexer *, bool);
+  void (*mark_end)(TSLexer *);
+  uint32_t (*get_column)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
+};
+
+typedef enum {
+  TSParseActionTypeShift,
+  TSParseActionTypeReduce,
+  TSParseActionTypeAccept,
+  TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef struct {
+  union {
+    struct {
+      TSStateId state;
+      bool extra : 1;
+      bool repetition : 1;
+    };
+    struct {
+      TSSymbol symbol;
+      int16_t dynamic_precedence;
+      uint8_t child_count;
+      uint8_t production_id;
+    };
+  } params;
+  TSParseActionType type : 4;
+} TSParseAction;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+  TSParseAction action;
+  struct {
+    uint8_t count;
+    bool reusable : 1;
+  };
+} TSParseActionEntry;
+
+struct TSLanguage {
+  uint32_t version;
+  uint32_t symbol_count;
+  uint32_t alias_count;
+  uint32_t token_count;
+  uint32_t external_token_count;
+  const char **symbol_names;
+  const TSSymbolMetadata *symbol_metadata;
+  const uint16_t *parse_table;
+  const TSParseActionEntry *parse_actions;
+  const TSLexMode *lex_modes;
+  const TSSymbol *alias_sequences;
+  uint16_t max_alias_sequence_length;
+  bool (*lex_fn)(TSLexer *, TSStateId);
+  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+  TSSymbol keyword_capture_token;
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)(void);
+    void (*destroy)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    unsigned (*serialize)(void *, char *);
+    void (*deserialize)(void *, const char *, unsigned);
+  } external_scanner;
+  uint32_t field_count;
+  const TSFieldMapSlice *field_map_slices;
+  const TSFieldMapEntry *field_map_entries;
+  const char **field_names;
+  uint32_t large_state_count;
+  const uint16_t *small_parse_table;
+  const uint32_t *small_parse_table_map;
+  const TSSymbol *public_symbol_map;
+};
+
+/*
+ *  Lexer Macros
+ */
+
+#define START_LEXER()           \
+  bool result = false;          \
+  bool skip = false;            \
+  bool eof = false;             \
+  int32_t lookahead;            \
+  goto start;                   \
+  next_state:                   \
+  lexer->advance(lexer, skip);  \
+  start:                        \
+  skip = false;                 \
+  lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+  {                          \
+    state = state_value;     \
+    goto next_state;         \
+  }
+
+#define SKIP(state_value) \
+  {                       \
+    skip = true;          \
+    state = state_value;  \
+    goto next_state;      \
+  }
+
+#define ACCEPT_TOKEN(symbol_value)     \
+  result = true;                       \
+  lexer->result_symbol = symbol_value; \
+  lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ *  Parse Table Macros
+ */
+
+#define SMALL_STATE(id) id - LARGE_STATE_COUNT
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value)              \
+  {                                     \
+    {                                   \
+      .type = TSParseActionTypeShift,   \
+      .params = {.state = state_value}, \
+    }                                   \
+  }
+
+#define SHIFT_REPEAT(state_value)     \
+  {                                   \
+    {                                 \
+      .type = TSParseActionTypeShift, \
+      .params = {                     \
+        .state = state_value,         \
+        .repetition = true            \
+      },                              \
+    }                                 \
+  }
+
+#define RECOVER()                        \
+  {                                      \
+    { .type = TSParseActionTypeRecover } \
+  }
+
+#define SHIFT_EXTRA()                 \
+  {                                   \
+    {                                 \
+      .type = TSParseActionTypeShift, \
+      .params = {.extra = true}       \
+    }                                 \
+  }
+
+#define REDUCE(symbol_val, child_count_val, ...) \
+  {                                              \
+    {                                            \
+      .type = TSParseActionTypeReduce,           \
+      .params = {                                \
+        .symbol = symbol_val,                    \
+        .child_count = child_count_val,          \
+        __VA_ARGS__                              \
+      }                                          \
+    }                                            \
+  }
+
+#define ACCEPT_INPUT()                  \
+  {                                     \
+    { .type = TSParseActionTypeAccept } \
+  }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_PARSER_H_
--- a/shlr/tree-sitter/lib/src/alloc.h
+++ b/shlr/tree-sitter/lib/src/alloc.h
@ -0,0 +1,81 @@
+#ifndef TREE_SITTER_ALLOC_H_
+#define TREE_SITTER_ALLOC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdio.h>
+
+#if defined(TREE_SITTER_TEST)
+
+void *ts_record_malloc(size_t);
+void *ts_record_calloc(size_t, size_t);
+void *ts_record_realloc(void *, size_t);
+void ts_record_free(void *);
+bool ts_toggle_allocation_recording(bool);
+
+static inline void *ts_malloc(size_t size) {
+  return ts_record_malloc(size);
+}
+
+static inline void *ts_calloc(size_t count, size_t size) {
+  return ts_record_calloc(count, size);
+}
+
+static inline void *ts_realloc(void *buffer, size_t size) {
+  return ts_record_realloc(buffer, size);
+}
+
+static inline void ts_free(void *buffer) {
+  ts_record_free(buffer);
+}
+
+#else
+
+#include <stdlib.h>
+
+static inline bool ts_toggle_allocation_recording(bool value) {
+  return false;
+}
+
+static inline void *ts_malloc(size_t size) {
+  void *result = malloc(size);
+  if (size > 0 && !result) {
+    fprintf(stderr, "tree-sitter failed to allocate %lu bytes", size);
+    exit(1);
+  }
+  return result;
+}
+
+static inline void *ts_calloc(size_t count, size_t size) {
+  void *result = calloc(count, size);
+  if (count > 0 && !result) {
+    fprintf(stderr, "tree-sitter failed to allocate %lu bytes", count * size);
+    exit(1);
+  }
+  return result;
+}
+
+static inline void *ts_realloc(void *buffer, size_t size) {
+  void *result = realloc(buffer, size);
+  if (size > 0 && !result) {
+    fprintf(stderr, "tree-sitter failed to reallocate %lu bytes", size);
+    exit(1);
+  }
+  return result;
+}
+
+static inline void ts_free(void *buffer) {
+  free(buffer);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_ALLOC_H_
--- a/shlr/tree-sitter/lib/src/array.h
+++ b/shlr/tree-sitter/lib/src/array.h
@ -0,0 +1,142 @@
+#ifndef TREE_SITTER_ARRAY_H_
+#define TREE_SITTER_ARRAY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+#include <stdbool.h>
+#include "./alloc.h"
+
+#define Array(T)     \
+  struct {           \
+    T *contents;     \
+    uint32_t size;     \
+    uint32_t capacity; \
+  }
+
+#define array_init(self) \
+  ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
+
+#define array_new() \
+  { NULL, 0, 0 }
+
+#define array_get(self, index) \
+  (assert((uint32_t)index < (self)->size), &(self)->contents[index])
+
+#define array_front(self) array_get(self, 0)
+
+#define array_back(self) array_get(self, (self)->size - 1)
+
+#define array_clear(self) ((self)->size = 0)
+
+#define array_reserve(self, new_capacity) \
+  array__reserve((VoidArray *)(self), array__elem_size(self), new_capacity)
+
+#define array_erase(self, index) \
+  array__erase((VoidArray *)(self), array__elem_size(self), index)
+
+#define array_delete(self) array__delete((VoidArray *)self)
+
+#define array_push(self, element)                            \
+  (array__grow((VoidArray *)(self), 1, array__elem_size(self)), \
+   (self)->contents[(self)->size++] = (element))
+
+#define array_grow_by(self, count) \
+  (array__grow((VoidArray *)(self), count, array__elem_size(self)), \
+   memset((self)->contents + (self)->size, 0, (count) * array__elem_size(self)), \
+   (self)->size += (count))
+
+#define array_push_all(self, other)                                       \
+  array_splice((self), (self)->size, 0, (other)->size, (other)->contents)
+
+#define array_splice(self, index, old_count, new_count, new_contents)          \
+  array__splice((VoidArray *)(self), array__elem_size(self), index, old_count, \
+                new_count, new_contents)
+
+#define array_insert(self, index, element) \
+  array__splice((VoidArray *)(self), array__elem_size(self), index, 0, 1, &element)
+
+#define array_pop(self) ((self)->contents[--(self)->size])
+
+#define array_assign(self, other) \
+  array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self))
+
+// Private
+
+typedef Array(void) VoidArray;
+
+#define array__elem_size(self) sizeof(*(self)->contents)
+
+static inline void array__delete(VoidArray *self) {
+  ts_free(self->contents);
+  self->contents = NULL;
+  self->size = 0;
+  self->capacity = 0;
+}
+
+static inline void array__erase(VoidArray *self, size_t element_size,
+                                uint32_t index) {
+  assert(index < self->size);
+  char *contents = (char *)self->contents;
+  memmove(contents + index * element_size, contents + (index + 1) * element_size,
+          (self->size - index - 1) * element_size);
+  self->size--;
+}
+
+static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t new_capacity) {
+  if (new_capacity > self->capacity) {
+    if (self->contents) {
+      self->contents = ts_realloc(self->contents, new_capacity * element_size);
+    } else {
+      self->contents = ts_calloc(new_capacity, element_size);
+    }
+    self->capacity = new_capacity;
+  }
+}
+
+static inline void array__assign(VoidArray *self, const VoidArray *other, size_t element_size) {
+  array__reserve(self, element_size, other->size);
+  self->size = other->size;
+  memcpy(self->contents, other->contents, self->size * element_size);
+}
+
+static inline void array__grow(VoidArray *self, size_t count, size_t element_size) {
+  size_t new_size = self->size + count;
+  if (new_size > self->capacity) {
+    size_t new_capacity = self->capacity * 2;
+    if (new_capacity < 8) new_capacity = 8;
+    if (new_capacity < new_size) new_capacity = new_size;
+    array__reserve(self, element_size, new_capacity);
+  }
+}
+
+static inline void array__splice(VoidArray *self, size_t element_size,
+                                 uint32_t index, uint32_t old_count,
+                                 uint32_t new_count, const void *elements) {
+  uint32_t new_size = self->size + new_count - old_count;
+  uint32_t old_end = index + old_count;
+  uint32_t new_end = index + new_count;
+  assert(old_end <= self->size);
+
+  array__reserve(self, element_size, new_size);
+
+  char *contents = (char *)self->contents;
+  if (self->size > old_end)
+    memmove(contents + new_end * element_size, contents + old_end * element_size,
+            (self->size - old_end) * element_size);
+  if (new_count > 0)
+    memcpy((contents + index * element_size), elements,
+           new_count * element_size);
+  self->size += new_count - old_count;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_ARRAY_H_
--- a/shlr/tree-sitter/lib/src/atomic.h
+++ b/shlr/tree-sitter/lib/src/atomic.h
@ -0,0 +1,42 @@
+#ifndef TREE_SITTER_ATOMIC_H_
+#define TREE_SITTER_ATOMIC_H_
+
+#include <stdint.h>
+
+#ifdef _WIN32
+
+#include <windows.h>
+
+static inline size_t atomic_load(const volatile size_t *p) {
+  return *p;
+}
+
+static inline uint32_t atomic_inc(volatile uint32_t *p) {
+  return InterlockedIncrement((long volatile *)p);
+}
+
+static inline uint32_t atomic_dec(volatile uint32_t *p) {
+  return InterlockedDecrement((long volatile *)p);
+}
+
+#else
+
+static inline size_t atomic_load(const volatile size_t *p) {
+#ifdef __ATOMIC_RELAXED
+  return __atomic_load_n(p, __ATOMIC_RELAXED);
+#else
+  return __sync_fetch_and_add((volatile size_t *)p, 0);
+#endif
+}
+
+static inline uint32_t atomic_inc(volatile uint32_t *p) {
+  return __sync_add_and_fetch(p, 1u);
+}
+
+static inline uint32_t atomic_dec(volatile uint32_t *p) {
+  return __sync_sub_and_fetch(p, 1u);
+}
+
+#endif
+
+#endif  // TREE_SITTER_ATOMIC_H_
--- a/shlr/tree-sitter/lib/src/bits.h
+++ b/shlr/tree-sitter/lib/src/bits.h
@ -0,0 +1,29 @@
+#ifndef TREE_SITTER_BITS_H_
+#define TREE_SITTER_BITS_H_
+
+#include <stdint.h>
+
+static inline uint32_t bitmask_for_index(uint16_t id) {
+  return (1u << (31 - id));
+}
+
+#if defined _WIN32 && !defined __GNUC__
+
+#include <intrin.h>
+
+static inline uint32_t count_leading_zeros(uint32_t x) {
+  if (x == 0) return 32;
+  uint32_t result;
+  _BitScanReverse(&result, x);
+  return 31 - result;
+}
+
+#else
+
+static inline uint32_t count_leading_zeros(uint32_t x) {
+  if (x == 0) return 32;
+  return __builtin_clz(x);
+}
+
+#endif
+#endif  // TREE_SITTER_BITS_H_
--- a/shlr/tree-sitter/lib/src/clock.h
+++ b/shlr/tree-sitter/lib/src/clock.h
@ -0,0 +1,141 @@
+#ifndef TREE_SITTER_CLOCK_H_
+#define TREE_SITTER_CLOCK_H_
+
+#include <stdint.h>
+
+typedef uint64_t TSDuration;
+
+#ifdef _WIN32
+
+// Windows:
+// * Represent a time as a performance counter value.
+// * Represent a duration as a number of performance counter ticks.
+
+#include <windows.h>
+typedef uint64_t TSClock;
+
+static inline TSDuration duration_from_micros(uint64_t micros) {
+  LARGE_INTEGER frequency;
+  QueryPerformanceFrequency(&frequency);
+  return micros * (uint64_t)frequency.QuadPart / 1000000;
+}
+
+static inline uint64_t duration_to_micros(TSDuration self) {
+  LARGE_INTEGER frequency;
+  QueryPerformanceFrequency(&frequency);
+  return self * 1000000 / (uint64_t)frequency.QuadPart;
+}
+
+static inline TSClock clock_null(void) {
+  return 0;
+}
+
+static inline TSClock clock_now(void) {
+  LARGE_INTEGER result;
+  QueryPerformanceCounter(&result);
+  return (uint64_t)result.QuadPart;
+}
+
+static inline TSClock clock_after(TSClock base, TSDuration duration) {
+  return base + duration;
+}
+
+static inline bool clock_is_null(TSClock self) {
+  return !self;
+}
+
+static inline bool clock_is_gt(TSClock self, TSClock other) {
+  return self > other;
+}
+
+#elif defined(CLOCK_MONOTONIC) && !defined(__APPLE__)
+
+// POSIX with monotonic clock support (Linux)
+// * Represent a time as a monotonic (seconds, nanoseconds) pair.
+// * Represent a duration as a number of microseconds.
+//
+// On these platforms, parse timeouts will correspond accurately to
+// real time, regardless of what other processes are running.
+
+#include <time.h>
+typedef struct timespec TSClock;
+
+static inline TSDuration duration_from_micros(uint64_t micros) {
+  return micros;
+}
+
+static inline uint64_t duration_to_micros(TSDuration self) {
+  return self;
+}
+
+static inline TSClock clock_now(void) {
+  TSClock result;
+  clock_gettime(CLOCK_MONOTONIC, &result);
+  return result;
+}
+
+static inline TSClock clock_null(void) {
+  return (TSClock) {0, 0};
+}
+
+static inline TSClock clock_after(TSClock base, TSDuration duration) {
+  TSClock result = base;
+  result.tv_sec += duration / 1000000;
+  result.tv_nsec += (duration % 1000000) * 1000;
+  return result;
+}
+
+static inline bool clock_is_null(TSClock self) {
+  return !self.tv_sec;
+}
+
+static inline bool clock_is_gt(TSClock self, TSClock other) {
+  if (self.tv_sec > other.tv_sec) return true;
+  if (self.tv_sec < other.tv_sec) return false;
+  return self.tv_nsec > other.tv_nsec;
+}
+
+#else
+
+// macOS or POSIX without monotonic clock support
+// * Represent a time as a process clock value.
+// * Represent a duration as a number of process clock ticks.
+//
+// On these platforms, parse timeouts may be affected by other processes,
+// which is not ideal, but is better than using a non-monotonic time API
+// like `gettimeofday`.
+
+#include <time.h>
+typedef uint64_t TSClock;
+
+static inline TSDuration duration_from_micros(uint64_t micros) {
+  return micros * (uint64_t)CLOCKS_PER_SEC / 1000000;
+}
+
+static inline uint64_t duration_to_micros(TSDuration self) {
+  return self * 1000000 / (uint64_t)CLOCKS_PER_SEC;
+}
+
+static inline TSClock clock_null(void) {
+  return 0;
+}
+
+static inline TSClock clock_now(void) {
+  return (uint64_t)clock();
+}
+
+static inline TSClock clock_after(TSClock base, TSDuration duration) {
+  return base + duration;
+}
+
+static inline bool clock_is_null(TSClock self) {
+  return !self;
+}
+
+static inline bool clock_is_gt(TSClock self, TSClock other) {
+  return self > other;
+}
+
+#endif
+
+#endif  // TREE_SITTER_CLOCK_H_
--- a/shlr/tree-sitter/lib/src/error_costs.h
+++ b/shlr/tree-sitter/lib/src/error_costs.h
@ -0,0 +1,11 @@
+#ifndef TREE_SITTER_ERROR_COSTS_H_
+#define TREE_SITTER_ERROR_COSTS_H_
+
+#define ERROR_STATE 0
+#define ERROR_COST_PER_RECOVERY 500
+#define ERROR_COST_PER_MISSING_TREE 110
+#define ERROR_COST_PER_SKIPPED_TREE 100
+#define ERROR_COST_PER_SKIPPED_LINE 30
+#define ERROR_COST_PER_SKIPPED_CHAR 1
+
+#endif
--- a/shlr/tree-sitter/lib/src/get_changed_ranges.c
+++ b/shlr/tree-sitter/lib/src/get_changed_ranges.c
@ -0,0 +1,482 @@
+#include "./get_changed_ranges.h"
+#include "./subtree.h"
+#include "./language.h"
+#include "./error_costs.h"
+#include "./tree_cursor.h"
+#include <assert.h>
+
+// #define DEBUG_GET_CHANGED_RANGES
+
+static void ts_range_array_add(TSRangeArray *self, Length start, Length end) {
+  if (self->size > 0) {
+    TSRange *last_range = array_back(self);
+    if (start.bytes <= last_range->end_byte) {
+      last_range->end_byte = end.bytes;
+      last_range->end_point = end.extent;
+      return;
+    }
+  }
+
+  if (start.bytes < end.bytes) {
+    TSRange range = { start.extent, end.extent, start.bytes, end.bytes };
+    array_push(self, range);
+  }
+}
+
+bool ts_range_array_intersects(const TSRangeArray *self, unsigned start_index,
+                               uint32_t start_byte, uint32_t end_byte) {
+  for (unsigned i = start_index; i < self->size; i++) {
+    TSRange *range = &self->contents[i];
+    if (range->end_byte > start_byte) {
+      if (range->start_byte >= end_byte) break;
+      return true;
+    }
+  }
+  return false;
+}
+
+void ts_range_array_get_changed_ranges(
+  const TSRange *old_ranges, unsigned old_range_count,
+  const TSRange *new_ranges, unsigned new_range_count,
+  TSRangeArray *differences
+) {
+  unsigned new_index = 0;
+  unsigned old_index = 0;
+  Length current_position = length_zero();
+  bool in_old_range = false;
+  bool in_new_range = false;
+
+  while (old_index < old_range_count || new_index < new_range_count) {
+    const TSRange *old_range = &old_ranges[old_index];
+    const TSRange *new_range = &new_ranges[new_index];
+
+    Length next_old_position;
+    if (in_old_range) {
+      next_old_position = (Length) {old_range->end_byte, old_range->end_point};
+    } else if (old_index < old_range_count) {
+      next_old_position = (Length) {old_range->start_byte, old_range->start_point};
+    } else {
+      next_old_position = LENGTH_MAX;
+    }
+
+    Length next_new_position;
+    if (in_new_range) {
+      next_new_position = (Length) {new_range->end_byte, new_range->end_point};
+    } else if (new_index < new_range_count) {
+      next_new_position = (Length) {new_range->start_byte, new_range->start_point};
+    } else {
+      next_new_position = LENGTH_MAX;
+    }
+
+    if (next_old_position.bytes < next_new_position.bytes) {
+      if (in_old_range != in_new_range) {
+        ts_range_array_add(differences, current_position, next_old_position);
+      }
+      if (in_old_range) old_index++;
+      current_position = next_old_position;
+      in_old_range = !in_old_range;
+    } else if (next_new_position.bytes < next_old_position.bytes) {
+      if (in_old_range != in_new_range) {
+        ts_range_array_add(differences, current_position, next_new_position);
+      }
+      if (in_new_range) new_index++;
+      current_position = next_new_position;
+      in_new_range = !in_new_range;
+    } else {
+      if (in_old_range != in_new_range) {
+        ts_range_array_add(differences, current_position, next_new_position);
+      }
+      if (in_old_range) old_index++;
+      if (in_new_range) new_index++;
+      in_old_range = !in_old_range;
+      in_new_range = !in_new_range;
+      current_position = next_new_position;
+    }
+  }
+}
+
+typedef struct {
+  TreeCursor cursor;
+  const TSLanguage *language;
+  unsigned visible_depth;
+  bool in_padding;
+} Iterator;
+
+static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, const TSLanguage *language) {
+  array_clear(&cursor->stack);
+  array_push(&cursor->stack, ((TreeCursorEntry){
+    .subtree = tree,
+    .position = length_zero(),
+    .child_index = 0,
+    .structural_child_index = 0,
+  }));
+  return (Iterator) {
+    .cursor = *cursor,
+    .language = language,
+    .visible_depth = 1,
+    .in_padding = false,
+  };
+}
+
+static bool iterator_done(Iterator *self) {
+  return self->cursor.stack.size == 0;
+}
+
+static Length iterator_start_position(Iterator *self) {
+  TreeCursorEntry entry = *array_back(&self->cursor.stack);
+  if (self->in_padding) {
+    return entry.position;
+  } else {
+    return length_add(entry.position, ts_subtree_padding(*entry.subtree));
+  }
+}
+
+static Length iterator_end_position(Iterator *self) {
+  TreeCursorEntry entry = *array_back(&self->cursor.stack);
+  Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
+  if (self->in_padding) {
+    return result;
+  } else {
+    return length_add(result, ts_subtree_size(*entry.subtree));
+  }
+}
+
+static bool iterator_tree_is_visible(const Iterator *self) {
+  TreeCursorEntry entry = *array_back(&self->cursor.stack);
+  if (ts_subtree_visible(*entry.subtree)) return true;
+  if (self->cursor.stack.size > 1) {
+    Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
+    const TSSymbol *alias_sequence = ts_language_alias_sequence(
+      self->language,
+      parent.ptr->production_id
+    );
+    return alias_sequence && alias_sequence[entry.structural_child_index] != 0;
+  }
+  return false;
+}
+
+static void iterator_get_visible_state(const Iterator *self, Subtree *tree,
+                                       TSSymbol *alias_symbol, uint32_t *start_byte) {
+  uint32_t i = self->cursor.stack.size - 1;
+
+  if (self->in_padding) {
+    if (i == 0) return;
+    i--;
+  }
+
+  for (; i + 1 > 0; i--) {
+    TreeCursorEntry entry = self->cursor.stack.contents[i];
+
+    if (i > 0) {
+      const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
+      const TSSymbol *alias_sequence = ts_language_alias_sequence(
+        self->language,
+        parent->ptr->production_id
+      );
+      if (alias_sequence) {
+        *alias_symbol = alias_sequence[entry.structural_child_index];
+      }
+    }
+
+    if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
+      *tree = *entry.subtree;
+      *start_byte = entry.position.bytes;
+      break;
+    }
+  }
+}
+
+static void iterator_ascend(Iterator *self) {
+  if (iterator_done(self)) return;
+  if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
+  if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
+  self->cursor.stack.size--;
+}
+
+static bool iterator_descend(Iterator *self, uint32_t goal_position) {
+  if (self->in_padding) return false;
+
+  bool did_descend;
+  do {
+    did_descend = false;
+    TreeCursorEntry entry = *array_back(&self->cursor.stack);
+    Length position = entry.position;
+    uint32_t structural_child_index = 0;
+    for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
+      const Subtree *child = &entry.subtree->ptr->children[i];
+      Length child_left = length_add(position, ts_subtree_padding(*child));
+      Length child_right = length_add(child_left, ts_subtree_size(*child));
+
+      if (child_right.bytes > goal_position) {
+        array_push(&self->cursor.stack, ((TreeCursorEntry){
+          .subtree = child,
+          .position = position,
+          .child_index = i,
+          .structural_child_index = structural_child_index,
+        }));
+
+        if (iterator_tree_is_visible(self)) {
+          if (child_left.bytes > goal_position) {
+            self->in_padding = true;
+          } else {
+            self->visible_depth++;
+          }
+          return true;
+        }
+
+        did_descend = true;
+        break;
+      }
+
+      position = child_right;
+      if (!ts_subtree_extra(*child)) structural_child_index++;
+    }
+  } while (did_descend);
+
+  return false;
+}
+
+static void iterator_advance(Iterator *self) {
+  if (self->in_padding) {
+    self->in_padding = false;
+    if (iterator_tree_is_visible(self)) {
+      self->visible_depth++;
+    } else {
+      iterator_descend(self, 0);
+    }
+    return;
+  }
+
+  for (;;) {
+    if (iterator_tree_is_visible(self)) self->visible_depth--;
+    TreeCursorEntry entry = array_pop(&self->cursor.stack);
+    if (iterator_done(self)) return;
+
+    const Subtree *parent = array_back(&self->cursor.stack)->subtree;
+    uint32_t child_index = entry.child_index + 1;
+    if (ts_subtree_child_count(*parent) > child_index) {
+      Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
+      uint32_t structural_child_index = entry.structural_child_index;
+      if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
+      const Subtree *next_child = &parent->ptr->children[child_index];
+
+      array_push(&self->cursor.stack, ((TreeCursorEntry){
+        .subtree = next_child,
+        .position = position,
+        .child_index = child_index,
+        .structural_child_index = structural_child_index,
+      }));
+
+      if (iterator_tree_is_visible(self)) {
+        if (ts_subtree_padding(*next_child).bytes > 0) {
+          self->in_padding = true;
+        } else {
+          self->visible_depth++;
+        }
+      } else {
+        iterator_descend(self, 0);
+      }
+      break;
+    }
+  }
+}
+
+typedef enum {
+  IteratorDiffers,
+  IteratorMayDiffer,
+  IteratorMatches,
+} IteratorComparison;
+
+static IteratorComparison iterator_compare(const Iterator *old_iter, const Iterator *new_iter) {
+  Subtree old_tree = NULL_SUBTREE;
+  Subtree new_tree = NULL_SUBTREE;
+  uint32_t old_start = 0;
+  uint32_t new_start = 0;
+  TSSymbol old_alias_symbol = 0;
+  TSSymbol new_alias_symbol = 0;
+  iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
+  iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
+
+  if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches;
+  if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers;
+
+  if (
+    old_alias_symbol == new_alias_symbol &&
+    ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree)
+  ) {
+    if (old_start == new_start &&
+        !ts_subtree_has_changes(old_tree) &&
+        ts_subtree_symbol(old_tree) != ts_builtin_sym_error &&
+        ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes &&
+        ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE &&
+        ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE &&
+        (ts_subtree_parse_state(old_tree) == ERROR_STATE) ==
+        (ts_subtree_parse_state(new_tree) == ERROR_STATE)) {
+      return IteratorMatches;
+    } else {
+      return IteratorMayDiffer;
+    }
+  }
+
+  return IteratorDiffers;
+}
+
+#ifdef DEBUG_GET_CHANGED_RANGES
+static inline void iterator_print_state(Iterator *self) {
+  TreeCursorEntry entry = *array_back(&self->cursor.stack);
+  TSPoint start = iterator_start_position(self).extent;
+  TSPoint end = iterator_end_position(self).extent;
+  const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
+  printf(
+    "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
+    name, self->in_padding ? "(p)" : "   ",
+    self->visible_depth,
+    start.row + 1, start.column,
+    end.row + 1, end.column
+  );
+}
+#endif
+
+unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *new_tree,
+                                       TreeCursor *cursor1, TreeCursor *cursor2,
+                                       const TSLanguage *language,
+                                       const TSRangeArray *included_range_differences,
+                                       TSRange **ranges) {
+  TSRangeArray results = array_new();
+
+  Iterator old_iter = iterator_new(cursor1, old_tree, language);
+  Iterator new_iter = iterator_new(cursor2, new_tree, language);
+
+  unsigned included_range_difference_index = 0;
+
+  Length position = iterator_start_position(&old_iter);
+  Length next_position = iterator_start_position(&new_iter);
+  if (position.bytes < next_position.bytes) {
+    ts_range_array_add(&results, position, next_position);
+    position = next_position;
+  } else if (position.bytes > next_position.bytes) {
+    ts_range_array_add(&results, next_position, position);
+    next_position = position;
+  }
+
+  do {
+    #ifdef DEBUG_GET_CHANGED_RANGES
+    printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
+    iterator_print_state(&old_iter);
+    printf("\tvs\t");
+    iterator_print_state(&new_iter);
+    puts("");
+    #endif
+
+    // Compare the old and new subtrees.
+    IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);
+
+    // Even if the two subtrees appear to be identical, they could differ
+    // internally if they contain a range of text that was previously
+    // excluded from the parse, and is now included, or vice-versa.
+    if (comparison == IteratorMatches && ts_range_array_intersects(
+      included_range_differences,
+      included_range_difference_index,
+      position.bytes,
+      iterator_end_position(&old_iter).bytes
+    )) {
+      comparison = IteratorMayDiffer;
+    }
+
+    bool is_changed = false;
+    switch (comparison) {
+      // If the subtrees are definitely identical, move to the end
+      // of both subtrees.
+      case IteratorMatches:
+        next_position = iterator_end_position(&old_iter);
+        break;
+
+      // If the subtrees might differ internally, descend into both
+      // subtrees, finding the first child that spans the current position.
+      case IteratorMayDiffer:
+        if (iterator_descend(&old_iter, position.bytes)) {
+          if (!iterator_descend(&new_iter, position.bytes)) {
+            is_changed = true;
+            next_position = iterator_end_position(&old_iter);
+          }
+        } else if (iterator_descend(&new_iter, position.bytes)) {
+          is_changed = true;
+          next_position = iterator_end_position(&new_iter);
+        } else {
+          next_position = length_min(
+            iterator_end_position(&old_iter),
+            iterator_end_position(&new_iter)
+          );
+        }
+        break;
+
+      // If the subtrees are different, record a change and then move
+      // to the end of both subtrees.
+      case IteratorDiffers:
+        is_changed = true;
+        next_position = length_min(
+          iterator_end_position(&old_iter),
+          iterator_end_position(&new_iter)
+        );
+        break;
+    }
+
+    // Ensure that both iterators are caught up to the current position.
+    while (
+      !iterator_done(&old_iter) &&
+      iterator_end_position(&old_iter).bytes <= next_position.bytes
+    ) iterator_advance(&old_iter);
+    while (
+      !iterator_done(&new_iter) &&
+      iterator_end_position(&new_iter).bytes <= next_position.bytes
+    ) iterator_advance(&new_iter);
+
+    // Ensure that both iterators are at the same depth in the tree.
+    while (old_iter.visible_depth > new_iter.visible_depth) {
+      iterator_ascend(&old_iter);
+    }
+    while (new_iter.visible_depth > old_iter.visible_depth) {
+      iterator_ascend(&new_iter);
+    }
+
+    if (is_changed) {
+      #ifdef DEBUG_GET_CHANGED_RANGES
+      printf(
+        "  change: [[%u, %u] - [%u, %u]]\n",
+        position.extent.row + 1, position.extent.column,
+        next_position.extent.row + 1, next_position.extent.column
+      );
+      #endif
+
+      ts_range_array_add(&results, position, next_position);
+    }
+
+    position = next_position;
+
+    // Keep track of the current position in the included range differences
+    // array in order to avoid scanning the entire array on each iteration.
+    while (included_range_difference_index < included_range_differences->size) {
+      const TSRange *range = &included_range_differences->contents[
+        included_range_difference_index
+      ];
+      if (range->end_byte <= position.bytes) {
+        included_range_difference_index++;
+      } else {
+        break;
+      }
+    }
+  } while (!iterator_done(&old_iter) && !iterator_done(&new_iter));
+
+  Length old_size = ts_subtree_total_size(*old_tree);
+  Length new_size = ts_subtree_total_size(*new_tree);
+  if (old_size.bytes < new_size.bytes) {
+    ts_range_array_add(&results, old_size, new_size);
+  } else if (new_size.bytes < old_size.bytes) {
+    ts_range_array_add(&results, new_size, old_size);
+  }
+
+  *cursor1 = old_iter.cursor;
+  *cursor2 = new_iter.cursor;
+  *ranges = results.contents;
+  return results.size;
+}
--- a/shlr/tree-sitter/lib/src/get_changed_ranges.h
+++ b/shlr/tree-sitter/lib/src/get_changed_ranges.h
@ -0,0 +1,36 @@
+#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_
+#define TREE_SITTER_GET_CHANGED_RANGES_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./tree_cursor.h"
+#include "./subtree.h"
+
+typedef Array(TSRange) TSRangeArray;
+
+void ts_range_array_get_changed_ranges(
+  const TSRange *old_ranges, unsigned old_range_count,
+  const TSRange *new_ranges, unsigned new_range_count,
+  TSRangeArray *differences
+);
+
+bool ts_range_array_intersects(
+  const TSRangeArray *self, unsigned start_index,
+  uint32_t start_byte, uint32_t end_byte
+);
+
+unsigned ts_subtree_get_changed_ranges(
+  const Subtree *old_tree, const Subtree *new_tree,
+  TreeCursor *cursor1, TreeCursor *cursor2,
+  const TSLanguage *language,
+  const TSRangeArray *included_range_differences,
+  TSRange **ranges
+);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_GET_CHANGED_RANGES_H_
--- a/shlr/tree-sitter/lib/src/language.c
+++ b/shlr/tree-sitter/lib/src/language.c
@ -0,0 +1,147 @@
+#include "./language.h"
+#include "./subtree.h"
+#include "./error_costs.h"
+#include <string.h>
+
+uint32_t ts_language_symbol_count(const TSLanguage *self) {
+  return self->symbol_count + self->alias_count;
+}
+
+uint32_t ts_language_version(const TSLanguage *self) {
+  return self->version;
+}
+
+uint32_t ts_language_field_count(const TSLanguage *self) {
+  if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS) {
+    return self->field_count;
+  } else {
+    return 0;
+  }
+}
+
+void ts_language_table_entry(
+  const TSLanguage *self,
+  TSStateId state,
+  TSSymbol symbol,
+  TableEntry *result
+) {
+  if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
+    result->action_count = 0;
+    result->is_reusable = false;
+    result->actions = NULL;
+  } else {
+    assert(symbol < self->token_count);
+    uint32_t action_index = ts_language_lookup(self, state, symbol);
+    const TSParseActionEntry *entry = &self->parse_actions[action_index];
+    result->action_count = entry->count;
+    result->is_reusable = entry->reusable;
+    result->actions = (const TSParseAction *)(entry + 1);
+  }
+}
+
+TSSymbolMetadata ts_language_symbol_metadata(
+  const TSLanguage *self,
+  TSSymbol symbol
+) {
+  if (symbol == ts_builtin_sym_error)  {
+    return (TSSymbolMetadata){.visible = true, .named = true};
+  } else if (symbol == ts_builtin_sym_error_repeat) {
+    return (TSSymbolMetadata){.visible = false, .named = false};
+  } else {
+    return self->symbol_metadata[symbol];
+  }
+}
+
+TSSymbol ts_language_public_symbol(
+  const TSLanguage *self,
+  TSSymbol symbol
+) {
+  if (symbol == ts_builtin_sym_error) return symbol;
+  if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) {
+    return self->public_symbol_map[symbol];
+  } else {
+    return symbol;
+  }
+}
+
+const char *ts_language_symbol_name(
+  const TSLanguage *self,
+  TSSymbol symbol
+) {
+  if (symbol == ts_builtin_sym_error) {
+    return "ERROR";
+  } else if (symbol == ts_builtin_sym_error_repeat) {
+    return "_ERROR";
+  } else {
+    return self->symbol_names[symbol];
+  }
+}
+
+TSSymbol ts_language_symbol_for_name(
+  const TSLanguage *self,
+  const char *string,
+  uint32_t length,
+  bool is_named
+) {
+  if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error;
+  uint32_t count = ts_language_symbol_count(self);
+  for (TSSymbol i = 0; i < count; i++) {
+    TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i);
+    if (!metadata.visible || metadata.named != is_named) continue;
+    const char *symbol_name = self->symbol_names[i];
+    if (!strncmp(symbol_name, string, length) && !symbol_name[length]) {
+      if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) {
+        return self->public_symbol_map[i];
+      } else {
+        return i;
+      }
+    }
+  }
+  return 0;
+}
+
+TSSymbolType ts_language_symbol_type(
+  const TSLanguage *self,
+  TSSymbol symbol
+) {
+  TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol);
+  if (metadata.named) {
+    return TSSymbolTypeRegular;
+  } else if (metadata.visible) {
+    return TSSymbolTypeAnonymous;
+  } else {
+    return TSSymbolTypeAuxiliary;
+  }
+}
+
+const char *ts_language_field_name_for_id(
+  const TSLanguage *self,
+  TSFieldId id
+) {
+  uint32_t count = ts_language_field_count(self);
+  if (count) {
+    return self->field_names[id];
+  } else {
+    return NULL;
+  }
+}
+
+TSFieldId ts_language_field_id_for_name(
+  const TSLanguage *self,
+  const char *name,
+  uint32_t name_length
+) {
+  uint32_t count = ts_language_field_count(self);
+  for (TSSymbol i = 1; i < count + 1; i++) {
+    switch (strncmp(name, self->field_names[i], name_length)) {
+      case 0:
+        if (self->field_names[i][name_length] == 0) return i;
+        break;
+      case -1:
+        return 0;
+      default:
+        break;
+    }
+  }
+  return 0;
+}
--- a/shlr/tree-sitter/lib/src/language.h
+++ b/shlr/tree-sitter/lib/src/language.h
@ -0,0 +1,141 @@
+#ifndef TREE_SITTER_LANGUAGE_H_
+#define TREE_SITTER_LANGUAGE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./subtree.h"
+#include "tree_sitter/parser.h"
+
+#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
+#define TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS 10
+#define TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING 11
+#define TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES 11
+
+typedef struct {
+  const TSParseAction *actions;
+  uint32_t action_count;
+  bool is_reusable;
+} TableEntry;
+
+void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *);
+
+TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
+
+TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol);
+
+static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
+  return 0 < symbol && symbol < self->external_token_count + 1;
+}
+
+static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
+                                                       TSStateId state,
+                                                       TSSymbol symbol,
+                                                       uint32_t *count) {
+  TableEntry entry;
+  ts_language_table_entry(self, state, symbol, &entry);
+  *count = entry.action_count;
+  return entry.actions;
+}
+
+static inline bool ts_language_has_actions(const TSLanguage *self,
+                                           TSStateId state,
+                                           TSSymbol symbol) {
+  TableEntry entry;
+  ts_language_table_entry(self, state, symbol, &entry);
+  return entry.action_count > 0;
+}
+
+static inline bool ts_language_has_reduce_action(const TSLanguage *self,
+                                                 TSStateId state,
+                                                 TSSymbol symbol) {
+  TableEntry entry;
+  ts_language_table_entry(self, state, symbol, &entry);
+  return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
+}
+
+static inline uint16_t ts_language_lookup(
+  const TSLanguage *self,
+  TSStateId state,
+  TSSymbol symbol
+) {
+  if (
+    self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES &&
+    state >= self->large_state_count
+  ) {
+    uint32_t index = self->small_parse_table_map[state - self->large_state_count];
+    const uint16_t *data = &self->small_parse_table[index];
+    uint16_t section_count = *(data++);
+    for (unsigned i = 0; i < section_count; i++) {
+      uint16_t section_value = *(data++);
+      uint16_t symbol_count = *(data++);
+      for (unsigned i = 0; i < symbol_count; i++) {
+        if (*(data++) == symbol) return section_value;
+      }
+    }
+    return 0;
+  } else {
+    return self->parse_table[state * self->symbol_count + symbol];
+  }
+}
+
+static inline TSStateId ts_language_next_state(const TSLanguage *self,
+                                               TSStateId state,
+                                               TSSymbol symbol) {
+  if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
+    return 0;
+  } else if (symbol < self->token_count) {
+    uint32_t count;
+    const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
+    if (count > 0) {
+      TSParseAction action = actions[count - 1];
+      if (action.type == TSParseActionTypeShift || action.type == TSParseActionTypeRecover) {
+        return action.params.state;
+      }
+    }
+    return 0;
+  } else {
+    return ts_language_lookup(self, state, symbol);
+  }
+}
+
+static inline const bool *
+ts_language_enabled_external_tokens(const TSLanguage *self,
+                                    unsigned external_scanner_state) {
+  if (external_scanner_state == 0) {
+    return NULL;
+  } else {
+    return self->external_scanner.states + self->external_token_count * external_scanner_state;
+  }
+}
+
+static inline const TSSymbol *
+ts_language_alias_sequence(const TSLanguage *self, uint32_t production_id) {
+  return production_id > 0 ?
+    self->alias_sequences + production_id * self->max_alias_sequence_length :
+    NULL;
+}
+
+static inline void ts_language_field_map(
+  const TSLanguage *self,
+  uint32_t production_id,
+  const TSFieldMapEntry **start,
+  const TSFieldMapEntry **end
+) {
+  if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS || self->field_count == 0) {
+    *start = NULL;
+    *end = NULL;
+    return;
+  }
+
+  TSFieldMapSlice slice = self->field_map_slices[production_id];
+  *start = &self->field_map_entries[slice.index];
+  *end = &self->field_map_entries[slice.index] + slice.length;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_LANGUAGE_H_
--- a/shlr/tree-sitter/lib/src/length.h
+++ b/shlr/tree-sitter/lib/src/length.h
@ -0,0 +1,44 @@
+#ifndef TREE_SITTER_LENGTH_H_
+#define TREE_SITTER_LENGTH_H_
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include "./point.h"
+#include "tree_sitter/api.h"
+
+typedef struct {
+  uint32_t bytes;
+  TSPoint extent;
+} Length;
+
+static const Length LENGTH_UNDEFINED = {0, {0, 1}};
+static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}};
+
+static inline bool length_is_undefined(Length length) {
+  return length.bytes == 0 && length.extent.column != 0;
+}
+
+static inline Length length_min(Length len1, Length len2) {
+  return (len1.bytes < len2.bytes) ? len1 : len2;
+}
+
+static inline Length length_add(Length len1, Length len2) {
+  Length result;
+  result.bytes = len1.bytes + len2.bytes;
+  result.extent = point_add(len1.extent, len2.extent);
+  return result;
+}
+
+static inline Length length_sub(Length len1, Length len2) {
+  Length result;
+  result.bytes = len1.bytes - len2.bytes;
+  result.extent = point_sub(len1.extent, len2.extent);
+  return result;
+}
+
+static inline Length length_zero(void) {
+  Length result = {0, {0, 0}};
+  return result;
+}
+
+#endif
--- a/shlr/tree-sitter/lib/src/lexer.c
+++ b/shlr/tree-sitter/lib/src/lexer.c
@ -0,0 +1,391 @@
+#include <stdio.h>
+#include "./lexer.h"
+#include "./subtree.h"
+#include "./length.h"
+#include "./unicode.h"
+
+#define LOG(message, character)              \
+  if (self->logger.log) {                    \
+    snprintf(                                \
+      self->debug_buffer,                    \
+      TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \
+      32 <= character && character < 127 ?   \
+        message " character:'%c'" :          \
+        message " character:%d",             \
+      character                              \
+    );                                       \
+    self->logger.log(                        \
+      self->logger.payload,                  \
+      TSLogTypeLex,                          \
+      self->debug_buffer                     \
+    );                                       \
+  }
+
+static const int32_t BYTE_ORDER_MARK = 0xFEFF;
+
+static const TSRange DEFAULT_RANGE = {
+  .start_point = {
+    .row = 0,
+    .column = 0,
+  },
+  .end_point = {
+    .row = UINT32_MAX,
+    .column = UINT32_MAX,
+  },
+  .start_byte = 0,
+  .end_byte = UINT32_MAX
+};
+
+// Check if the lexer has reached EOF. This state is stored
+// by setting the lexer's `current_included_range_index` such that
+// it has consumed all of its available ranges.
+static bool ts_lexer__eof(const TSLexer *_self) {
+  Lexer *self = (Lexer *)_self;
+  return self->current_included_range_index == self->included_range_count;
+}
+
+// Clear the currently stored chunk of source code, because the lexer's
+// position has changed.
+static void ts_lexer__clear_chunk(Lexer *self) {
+  self->chunk = NULL;
+  self->chunk_size = 0;
+  self->chunk_start = 0;
+}
+
+// Call the lexer's input callback to obtain a new chunk of source code
+// for the current position.
+static void ts_lexer__get_chunk(Lexer *self) {
+  self->chunk_start = self->current_position.bytes;
+  self->chunk = self->input.read(
+    self->input.payload,
+    self->current_position.bytes,
+    self->current_position.extent,
+    &self->chunk_size
+  );
+  if (!self->chunk_size) {
+    self->current_included_range_index = self->included_range_count;
+    self->chunk = NULL;
+  }
+}
+
+// Decode the next unicode character in the current chunk of source code.
+// This assumes that the lexer has already retrieved a chunk of source
+// code that spans the current position.
+static void ts_lexer__get_lookahead(Lexer *self) {
+  uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
+  const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
+  uint32_t size = self->chunk_size - position_in_chunk;
+
+  if (size == 0) {
+    self->lookahead_size = 1;
+    self->data.lookahead = '\0';
+    return;
+  }
+
+  UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8
+    ? ts_decode_utf8
+    : ts_decode_utf16;
+
+  self->lookahead_size = decode(chunk, size, &self->data.lookahead);
+
+  // If this chunk ended in the middle of a multi-byte character,
+  // try again with a fresh chunk.
+  if (self->data.lookahead == TS_DECODE_ERROR && size < 4) {
+    ts_lexer__get_chunk(self);
+    chunk = (const uint8_t *)self->chunk;
+    size = self->chunk_size;
+    self->lookahead_size = decode(chunk, size, &self->data.lookahead);
+  }
+
+  if (self->data.lookahead == TS_DECODE_ERROR) {
+    self->lookahead_size = 1;
+  }
+}
+
+// Advance to the next character in the source code, retrieving a new
+// chunk of source code if needed.
+static void ts_lexer__advance(TSLexer *_self, bool skip) {
+  Lexer *self = (Lexer *)_self;
+  if (!self->chunk) return;
+
+  if (skip) {
+    LOG("skip", self->data.lookahead);
+  } else {
+    LOG("consume", self->data.lookahead);
+  }
+
+  if (self->lookahead_size) {
+    self->current_position.bytes += self->lookahead_size;
+    if (self->data.lookahead == '\n') {
+      self->current_position.extent.row++;
+      self->current_position.extent.column = 0;
+    } else {
+      self->current_position.extent.column += self->lookahead_size;
+    }
+  }
+
+  const TSRange *current_range = NULL;
+  if (self->current_included_range_index < self->included_range_count) {
+    current_range = &self->included_ranges[self->current_included_range_index];
+    if (self->current_position.bytes == current_range->end_byte) {
+      self->current_included_range_index++;
+      if (self->current_included_range_index < self->included_range_count) {
+        current_range++;
+        self->current_position = (Length) {
+          current_range->start_byte,
+          current_range->start_point,
+        };
+      } else {
+        current_range = NULL;
+      }
+    }
+  }
+
+  if (skip) self->token_start_position = self->current_position;
+
+  if (current_range) {
+    if (self->current_position.bytes >= self->chunk_start + self->chunk_size) {
+      ts_lexer__get_chunk(self);
+    }
+    ts_lexer__get_lookahead(self);
+  } else {
+    ts_lexer__clear_chunk(self);
+    self->data.lookahead = '\0';
+    self->lookahead_size = 1;
+  }
+}
+
+// Mark that a token match has completed. This can be called multiple
+// times if a longer match is found later.
+static void ts_lexer__mark_end(TSLexer *_self) {
+  Lexer *self = (Lexer *)_self;
+  if (!ts_lexer__eof(&self->data)) {
+    // If the lexer is right at the beginning of included range,
+    // then the token should be considered to end at the *end* of the
+    // previous included range, rather than here.
+    TSRange *current_included_range = &self->included_ranges[
+      self->current_included_range_index
+    ];
+    if (
+      self->current_included_range_index > 0 &&
+      self->current_position.bytes == current_included_range->start_byte
+    ) {
+      TSRange *previous_included_range = current_included_range - 1;
+      self->token_end_position = (Length) {
+        previous_included_range->end_byte,
+        previous_included_range->end_point,
+      };
+      return;
+    }
+  }
+  self->token_end_position = self->current_position;
+}
+
+static uint32_t ts_lexer__get_column(TSLexer *_self) {
+  Lexer *self = (Lexer *)_self;
+  uint32_t goal_byte = self->current_position.bytes;
+
+  self->current_position.bytes -= self->current_position.extent.column;
+  self->current_position.extent.column = 0;
+
+  if (self->current_position.bytes < self->chunk_start) {
+    ts_lexer__get_chunk(self);
+  }
+
+  uint32_t result = 0;
+  while (self->current_position.bytes < goal_byte) {
+    ts_lexer__advance(&self->data, false);
+    result++;
+  }
+
+  return result;
+}
+
+// Is the lexer at a boundary between two disjoint included ranges of
+// source code? This is exposed as an API because some languages' external
+// scanners need to perform custom actions at these bounaries.
+static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) {
+  const Lexer *self = (const Lexer *)_self;
+  if (self->current_included_range_index < self->included_range_count) {
+    TSRange *current_range = &self->included_ranges[self->current_included_range_index];
+    return self->current_position.bytes == current_range->start_byte;
+  } else {
+    return false;
+  }
+}
+
+void ts_lexer_init(Lexer *self) {
+  *self = (Lexer) {
+    .data = {
+      // The lexer's methods are stored as struct fields so that generated
+      // parsers can call them without needing to be linked against this
+      // library.
+      .advance = ts_lexer__advance,
+      .mark_end = ts_lexer__mark_end,
+      .get_column = ts_lexer__get_column,
+      .is_at_included_range_start = ts_lexer__is_at_included_range_start,
+      .eof = ts_lexer__eof,
+      .lookahead = 0,
+      .result_symbol = 0,
+    },
+    .chunk = NULL,
+    .chunk_size = 0,
+    .chunk_start = 0,
+    .current_position = {0, {0, 0}},
+    .logger = {
+      .payload = NULL,
+      .log = NULL
+    },
+    .included_ranges = NULL,
+    .included_range_count = 0,
+    .current_included_range_index = 0,
+  };
+  ts_lexer_set_included_ranges(self, NULL, 0);
+}
+
+void ts_lexer_delete(Lexer *self) {
+  ts_free(self->included_ranges);
+}
+
+static void ts_lexer_goto(Lexer *self, Length position) {
+  self->current_position = position;
+  bool found_included_range = false;
+
+  // Move to the first valid position at or after the given position.
+  for (unsigned i = 0; i < self->included_range_count; i++) {
+    TSRange *included_range = &self->included_ranges[i];
+    if (included_range->end_byte > position.bytes) {
+      if (included_range->start_byte > position.bytes) {
+        self->current_position = (Length) {
+          .bytes = included_range->start_byte,
+          .extent = included_range->start_point,
+        };
+      }
+
+      self->current_included_range_index = i;
+      found_included_range = true;
+      break;
+    }
+  }
+
+  if (found_included_range) {
+    // If the current position is outside of the current chunk of text,
+    // then clear out the current chunk of text.
+    if (self->chunk && (
+      position.bytes < self->chunk_start ||
+      position.bytes >= self->chunk_start + self->chunk_size
+    )) {
+      ts_lexer__clear_chunk(self);
+    }
+
+    self->lookahead_size = 0;
+    self->data.lookahead = '\0';
+  }
+
+  // If the given position is beyond any of included ranges, move to the EOF
+  // state - past the end of the included ranges.
+  else {
+    self->current_included_range_index = self->included_range_count;
+    TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
+    self->current_position = (Length) {
+      .bytes = last_included_range->end_byte,
+      .extent = last_included_range->end_point,
+    };
+    ts_lexer__clear_chunk(self);
+    self->lookahead_size = 1;
+    self->data.lookahead = '\0';
+  }
+}
+
+void ts_lexer_set_input(Lexer *self, TSInput input) {
+  self->input = input;
+  ts_lexer__clear_chunk(self);
+  ts_lexer_goto(self, self->current_position);
+}
+
+// Move the lexer to the given position. This doesn't do any work
+// if the parser is already at the given position.
+void ts_lexer_reset(Lexer *self, Length position) {
+  if (position.bytes != self->current_position.bytes) {
+    ts_lexer_goto(self, position);
+  }
+}
+
+void ts_lexer_start(Lexer *self) {
+  self->token_start_position = self->current_position;
+  self->token_end_position = LENGTH_UNDEFINED;
+  self->data.result_symbol = 0;
+  if (!ts_lexer__eof(&self->data)) {
+    if (!self->chunk_size) ts_lexer__get_chunk(self);
+    if (!self->lookahead_size) ts_lexer__get_lookahead(self);
+    if (
+      self->current_position.bytes == 0 &&
+      self->data.lookahead == BYTE_ORDER_MARK
+    ) ts_lexer__advance(&self->data, true);
+  }
+}
+
+void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
+  if (length_is_undefined(self->token_end_position)) {
+    ts_lexer__mark_end(&self->data);
+  }
+
+  uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
+
+  // In order to determine that a byte sequence is invalid UTF8 or UTF16,
+  // the character decoding algorithm may have looked at the following byte.
+  // Therefore, the next byte *after* the current (invalid) character
+  // affects the interpretation of the current character.
+  if (self->data.lookahead == TS_DECODE_ERROR) {
+    current_lookahead_end_byte++;
+  }
+
+  if (current_lookahead_end_byte > *lookahead_end_byte) {
+    *lookahead_end_byte = current_lookahead_end_byte;
+  }
+}
+
+void ts_lexer_advance_to_end(Lexer *self) {
+  while (self->chunk) {
+    ts_lexer__advance(&self->data, false);
+  }
+}
+
+void ts_lexer_mark_end(Lexer *self) {
+  ts_lexer__mark_end(&self->data);
+}
+
+bool ts_lexer_set_included_ranges(
+  Lexer *self,
+  const TSRange *ranges,
+  uint32_t count
+) {
+  if (count == 0 || !ranges) {
+    ranges = &DEFAULT_RANGE;
+    count = 1;
+  } else {
+    uint32_t previous_byte = 0;
+    for (unsigned i = 0; i < count; i++) {
+      const TSRange *range = &ranges[i];
+      if (
+        range->start_byte < previous_byte ||
+        range->end_byte < range->start_byte
+      ) return false;
+      previous_byte = range->end_byte;
+    }
+  }
+
+  size_t size = count * sizeof(TSRange);
+  self->included_ranges = ts_realloc(self->included_ranges, size);
+  memcpy(self->included_ranges, ranges, size);
+  self->included_range_count = count;
+  ts_lexer_goto(self, self->current_position);
+  return true;
+}
+
+TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) {
+  *count = self->included_range_count;
+  return self->included_ranges;
+}
+
+#undef LOG
--- a/shlr/tree-sitter/lib/src/lexer.h
+++ b/shlr/tree-sitter/lib/src/lexer.h
@ -0,0 +1,48 @@
+#ifndef TREE_SITTER_LEXER_H_
+#define TREE_SITTER_LEXER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./length.h"
+#include "./subtree.h"
+#include "tree_sitter/api.h"
+#include "tree_sitter/parser.h"
+
+typedef struct {
+  TSLexer data;
+  Length current_position;
+  Length token_start_position;
+  Length token_end_position;
+
+  TSRange *included_ranges;
+  size_t included_range_count;
+  size_t current_included_range_index;
+
+  const char *chunk;
+  uint32_t chunk_start;
+  uint32_t chunk_size;
+  uint32_t lookahead_size;
+
+  TSInput input;
+  TSLogger logger;
+  char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
+} Lexer;
+
+void ts_lexer_init(Lexer *);
+void ts_lexer_delete(Lexer *);
+void ts_lexer_set_input(Lexer *, TSInput);
+void ts_lexer_reset(Lexer *, Length);
+void ts_lexer_start(Lexer *);
+void ts_lexer_finish(Lexer *, uint32_t *);
+void ts_lexer_advance_to_end(Lexer *);
+void ts_lexer_mark_end(Lexer *);
+bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
+TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_LEXER_H_
--- a/shlr/tree-sitter/lib/src/lib.c
+++ b/shlr/tree-sitter/lib/src/lib.c
@ -0,0 +1,17 @@
+// The Tree-sitter library can be built by compiling this one source file.
+//
+// The following directories must be added to the include path:
+//   - include
+
+#define _POSIX_C_SOURCE 200112L
+
+#include "./get_changed_ranges.c"
+#include "./language.c"
+#include "./lexer.c"
+#include "./node.c"
+#include "./parser.c"
+#include "./query.c"
+#include "./stack.c"
+#include "./subtree.c"
+#include "./tree_cursor.c"
+#include "./tree.c"
--- a/shlr/tree-sitter/lib/src/node.c
+++ b/shlr/tree-sitter/lib/src/node.c
@ -0,0 +1,675 @@
+#include <stdbool.h>
+#include "./subtree.h"
+#include "./tree.h"
+#include "./language.h"
+
+typedef struct {
+  Subtree parent;
+  const TSTree *tree;
+  Length position;
+  uint32_t child_index;
+  uint32_t structural_child_index;
+  const TSSymbol *alias_sequence;
+} NodeChildIterator;
+
+// TSNode - constructors
+
+TSNode ts_node_new(
+  const TSTree *tree,
+  const Subtree *subtree,
+  Length position,
+  TSSymbol alias
+) {
+  return (TSNode) {
+    {position.bytes, position.extent.row, position.extent.column, alias},
+    subtree,
+    tree,
+  };
+}
+
+static inline TSNode ts_node__null(void) {
+  return ts_node_new(NULL, NULL, length_zero(), 0);
+}
+
+// TSNode - accessors
+
+uint32_t ts_node_start_byte(TSNode self) {
+  return self.context[0];
+}
+
+TSPoint ts_node_start_point(TSNode self) {
+  return (TSPoint) {self.context[1], self.context[2]};
+}
+
+static inline uint32_t ts_node__alias(const TSNode *self) {
+  return self->context[3];
+}
+
+static inline Subtree ts_node__subtree(TSNode self) {
+  return *(const Subtree *)self.id;
+}
+
+// NodeChildIterator
+
+static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
+  Subtree subtree = ts_node__subtree(*node);
+  if (ts_subtree_child_count(subtree) == 0) {
+    return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL};
+  }
+  const TSSymbol *alias_sequence = ts_language_alias_sequence(
+    node->tree->language,
+    subtree.ptr->production_id
+  );
+  return (NodeChildIterator) {
+    .tree = node->tree,
+    .parent = subtree,
+    .position = {ts_node_start_byte(*node), ts_node_start_point(*node)},
+    .child_index = 0,
+    .structural_child_index = 0,
+    .alias_sequence = alias_sequence,
+  };
+}
+
+static inline bool ts_node_child_iterator_done(NodeChildIterator *self) {
+  return self->child_index == self->parent.ptr->child_count;
+}
+
+static inline bool ts_node_child_iterator_next(
+  NodeChildIterator *self,
+  TSNode *result
+) {
+  if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
+  const Subtree *child = &self->parent.ptr->children[self->child_index];
+  TSSymbol alias_symbol = 0;
+  if (!ts_subtree_extra(*child)) {
+    if (self->alias_sequence) {
+      alias_symbol = self->alias_sequence[self->structural_child_index];
+    }
+    self->structural_child_index++;
+  }
+  if (self->child_index > 0) {
+    self->position = length_add(self->position, ts_subtree_padding(*child));
+  }
+  *result = ts_node_new(
+    self->tree,
+    child,
+    self->position,
+    alias_symbol
+  );
+  self->position = length_add(self->position, ts_subtree_size(*child));
+  self->child_index++;
+  return true;
+}
+
+// TSNode - private
+
+static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
+  Subtree tree = ts_node__subtree(self);
+  if (include_anonymous) {
+    return ts_subtree_visible(tree) || ts_node__alias(&self);
+  } else {
+    TSSymbol alias = ts_node__alias(&self);
+    if (alias) {
+      return ts_language_symbol_metadata(self.tree->language, alias).named;
+    } else {
+      return ts_subtree_visible(tree) && ts_subtree_named(tree);
+    }
+  }
+}
+
+static inline uint32_t ts_node__relevant_child_count(
+  TSNode self,
+  bool include_anonymous
+) {
+  Subtree tree = ts_node__subtree(self);
+  if (ts_subtree_child_count(tree) > 0) {
+    if (include_anonymous) {
+      return tree.ptr->visible_child_count;
+    } else {
+      return tree.ptr->named_child_count;
+    }
+  } else {
+    return 0;
+  }
+}
+
+static inline TSNode ts_node__child(
+  TSNode self,
+  uint32_t child_index,
+  bool include_anonymous
+) {
+  TSNode result = self;
+  bool did_descend = true;
+
+  while (did_descend) {
+    did_descend = false;
+
+    TSNode child;
+    uint32_t index = 0;
+    NodeChildIterator iterator = ts_node_iterate_children(&result);
+    while (ts_node_child_iterator_next(&iterator, &child)) {
+      if (ts_node__is_relevant(child, include_anonymous)) {
+        if (index == child_index) {
+          ts_tree_set_cached_parent(self.tree, &child, &self);
+          return child;
+        }
+        index++;
+      } else {
+        uint32_t grandchild_index = child_index - index;
+        uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous);
+        if (grandchild_index < grandchild_count) {
+          did_descend = true;
+          result = child;
+          child_index = grandchild_index;
+          break;
+        }
+        index += grandchild_count;
+      }
+    }
+  }
+
+  return ts_node__null();
+}
+
+static bool ts_subtree_has_trailing_empty_descendant(
+  Subtree self,
+  Subtree other
+) {
+  for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) {
+    Subtree child = self.ptr->children[i];
+    if (ts_subtree_total_bytes(child) > 0) break;
+    if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) {
+  Subtree self_subtree = ts_node__subtree(self);
+  bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0;
+  uint32_t target_end_byte = ts_node_end_byte(self);
+
+  TSNode node = ts_node_parent(self);
+  TSNode earlier_node = ts_node__null();
+  bool earlier_node_is_relevant = false;
+
+  while (!ts_node_is_null(node)) {
+    TSNode earlier_child = ts_node__null();
+    bool earlier_child_is_relevant = false;
+    bool found_child_containing_target = false;
+
+    TSNode child;
+    NodeChildIterator iterator = ts_node_iterate_children(&node);
+    while (ts_node_child_iterator_next(&iterator, &child)) {
+      if (child.id == self.id) break;
+      if (iterator.position.bytes > target_end_byte) {
+        found_child_containing_target = true;
+        break;
+      }
+
+      if (iterator.position.bytes == target_end_byte &&
+          (!self_is_empty ||
+           ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) {
+        found_child_containing_target = true;
+        break;
+      }
+
+      if (ts_node__is_relevant(child, include_anonymous)) {
+        earlier_child = child;
+        earlier_child_is_relevant = true;
+      } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
+        earlier_child = child;
+        earlier_child_is_relevant = false;
+      }
+    }
+
+    if (found_child_containing_target) {
+      if (!ts_node_is_null(earlier_child)) {
+        earlier_node = earlier_child;
+        earlier_node_is_relevant = earlier_child_is_relevant;
+      }
+      node = child;
+    } else if (earlier_child_is_relevant) {
+      return earlier_child;
+    } else if (!ts_node_is_null(earlier_child)) {
+      node = earlier_child;
+    } else if (earlier_node_is_relevant) {
+      return earlier_node;
+    } else {
+      node = earlier_node;
+    }
+  }
+
+  return ts_node__null();
+}
+
+static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) {
+  uint32_t target_end_byte = ts_node_end_byte(self);
+
+  TSNode node = ts_node_parent(self);
+  TSNode later_node = ts_node__null();
+  bool later_node_is_relevant = false;
+
+  while (!ts_node_is_null(node)) {
+    TSNode later_child = ts_node__null();
+    bool later_child_is_relevant = false;
+    TSNode child_containing_target = ts_node__null();
+
+    TSNode child;
+    NodeChildIterator iterator = ts_node_iterate_children(&node);
+    while (ts_node_child_iterator_next(&iterator, &child)) {
+      if (iterator.position.bytes < target_end_byte) continue;
+      if (ts_node_start_byte(child) <= ts_node_start_byte(self)) {
+        if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) {
+          child_containing_target = child;
+        }
+      } else if (ts_node__is_relevant(child, include_anonymous)) {
+        later_child = child;
+        later_child_is_relevant = true;
+        break;
+      } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
+        later_child = child;
+        later_child_is_relevant = false;
+        break;
+      }
+    }
+
+    if (!ts_node_is_null(child_containing_target)) {
+      if (!ts_node_is_null(later_child)) {
+        later_node = later_child;
+        later_node_is_relevant = later_child_is_relevant;
+      }
+      node = child_containing_target;
+    } else if (later_child_is_relevant) {
+      return later_child;
+    } else if (!ts_node_is_null(later_child)) {
+      node = later_child;
+    } else if (later_node_is_relevant) {
+      return later_node;
+    } else {
+      node = later_node;
+    }
+  }
+
+  return ts_node__null();
+}
+
+static inline TSNode ts_node__first_child_for_byte(
+  TSNode self,
+  uint32_t goal,
+  bool include_anonymous
+) {
+  TSNode node = self;
+  bool did_descend = true;
+
+  while (did_descend) {
+    did_descend = false;
+
+    TSNode child;
+    NodeChildIterator iterator = ts_node_iterate_children(&node);
+    while (ts_node_child_iterator_next(&iterator, &child)) {
+      if (ts_node_end_byte(child) > goal) {
+        if (ts_node__is_relevant(child, include_anonymous)) {
+          return child;
+        } else if (ts_node_child_count(child) > 0) {
+          did_descend = true;
+          node = child;
+          break;
+        }
+      }
+    }
+  }
+
+  return ts_node__null();
+}
+
+static inline TSNode ts_node__descendant_for_byte_range(
+  TSNode self,
+  uint32_t range_start,
+  uint32_t range_end,
+  bool include_anonymous
+) {
+  TSNode node = self;
+  TSNode last_visible_node = self;
+
+  bool did_descend = true;
+  while (did_descend) {
+    did_descend = false;
+
+    TSNode child;
+    NodeChildIterator iterator = ts_node_iterate_children(&node);
+    while (ts_node_child_iterator_next(&iterator, &child)) {
+      uint32_t node_end = iterator.position.bytes;
+
+      // The end of this node must extend far enough forward to touch
+      // the end of the range and exceed the start of the range.
+      if (node_end < range_end) continue;
+      if (node_end <= range_start) continue;
+
+      // The start of this node must extend far enough backward to
+      // touch the start of the range.
+      if (range_start < ts_node_start_byte(child)) break;
+
+      node = child;
+      if (ts_node__is_relevant(node, include_anonymous)) {
+        ts_tree_set_cached_parent(self.tree, &child, &last_visible_node);
+        last_visible_node = node;
+      }
+      did_descend = true;
+      break;
+    }
+  }
+
+  return last_visible_node;
+}
+
+static inline TSNode ts_node__descendant_for_point_range(
+  TSNode self,
+  TSPoint range_start,
+  TSPoint range_end,
+  bool include_anonymous
+) {
+  TSNode node = self;
+  TSNode last_visible_node = self;
+
+  bool did_descend = true;
+  while (did_descend) {
+    did_descend = false;
+
+    TSNode child;
+    NodeChildIterator iterator = ts_node_iterate_children(&node);
+    while (ts_node_child_iterator_next(&iterator, &child)) {
+      TSPoint node_end = iterator.position.extent;
+
+      // The end of this node must extend far enough forward to touch
+      // the end of the range and exceed the start of the range.
+      if (point_lt(node_end, range_end)) continue;
+      if (point_lte(node_end, range_start)) continue;
+
+      // The start of this node must extend far enough backward to
+      // touch the start of the range.
+      if (point_lt(range_start, ts_node_start_point(child))) break;
+
+      node = child;
+      if (ts_node__is_relevant(node, include_anonymous)) {
+        ts_tree_set_cached_parent(self.tree, &child, &last_visible_node);
+        last_visible_node = node;
+      }
+      did_descend = true;
+      break;
+    }
+  }
+
+  return last_visible_node;
+}
+
+// TSNode - public
+
+uint32_t ts_node_end_byte(TSNode self) {
+  return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes;
+}
+
+TSPoint ts_node_end_point(TSNode self) {
+  return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent);
+}
+
+TSSymbol ts_node_symbol(TSNode self) {
+  TSSymbol symbol = ts_node__alias(&self);
+  if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
+  return ts_language_public_symbol(self.tree->language, symbol);
+}
+
+const char *ts_node_type(TSNode self) {
+  TSSymbol symbol = ts_node__alias(&self);
+  if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
+  return ts_language_symbol_name(self.tree->language, symbol);
+}
+
+char *ts_node_string(TSNode self) {
+  return ts_subtree_string(ts_node__subtree(self), self.tree->language, false);
+}
+
+bool ts_node_eq(TSNode self, TSNode other) {
+  return self.tree == other.tree && self.id == other.id;
+}
+
+bool ts_node_is_null(TSNode self) {
+  return self.id == 0;
+}
+
+bool ts_node_is_extra(TSNode self) {
+  return ts_subtree_extra(ts_node__subtree(self));
+}
+
+bool ts_node_is_named(TSNode self) {
+  TSSymbol alias = ts_node__alias(&self);
+  return alias
+    ? ts_language_symbol_metadata(self.tree->language, alias).named
+    : ts_subtree_named(ts_node__subtree(self));
+}
+
+bool ts_node_is_missing(TSNode self) {
+  return ts_subtree_missing(ts_node__subtree(self));
+}
+
+bool ts_node_has_changes(TSNode self) {
+  return ts_subtree_has_changes(ts_node__subtree(self));
+}
+
+bool ts_node_has_error(TSNode self) {
+  return ts_subtree_error_cost(ts_node__subtree(self)) > 0;
+}
+
+TSNode ts_node_parent(TSNode self) {
+  TSNode node = ts_tree_get_cached_parent(self.tree, &self);
+  if (node.id) return node;
+
+  node = ts_tree_root_node(self.tree);
+  uint32_t end_byte = ts_node_end_byte(self);
+  if (node.id == self.id) return ts_node__null();
+
+  TSNode last_visible_node = node;
+  bool did_descend = true;
+  while (did_descend) {
+    did_descend = false;
+
+    TSNode child;
+    NodeChildIterator iterator = ts_node_iterate_children(&node);
+    while (ts_node_child_iterator_next(&iterator, &child)) {
+      if (
+        ts_node_start_byte(child) > ts_node_start_byte(self) ||
+        child.id == self.id
+      ) break;
+      if (iterator.position.bytes >= end_byte) {
+        node = child;
+        if (ts_node__is_relevant(child, true)) {
+          ts_tree_set_cached_parent(self.tree, &node, &last_visible_node);
+          last_visible_node = node;
+        }
+        did_descend = true;
+        break;
+      }
+    }
+  }
+
+  return last_visible_node;
+}
+
+TSNode ts_node_child(TSNode self, uint32_t child_index) {
+  return ts_node__child(self, child_index, true);
+}
+
+TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
+  return ts_node__child(self, child_index, false);
+}
+
+TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) {
+recur:
+  if (!field_id || ts_node_child_count(self) == 0) return ts_node__null();
+
+  const TSFieldMapEntry *field_map, *field_map_end;
+  ts_language_field_map(
+    self.tree->language,
+    ts_node__subtree(self).ptr->production_id,
+    &field_map,
+    &field_map_end
+  );
+  if (field_map == field_map_end) return ts_node__null();
+
+  // The field mappings are sorted by their field id. Scan all
+  // the mappings to find the ones for the given field id.
+  while (field_map->field_id < field_id) {
+    field_map++;
+    if (field_map == field_map_end) return ts_node__null();
+  }
+  while (field_map_end[-1].field_id > field_id) {
+    field_map_end--;
+    if (field_map == field_map_end) return ts_node__null();
+  }
+
+  TSNode child;
+  NodeChildIterator iterator = ts_node_iterate_children(&self);
+  while (ts_node_child_iterator_next(&iterator, &child)) {
+    if (!ts_subtree_extra(ts_node__subtree(child))) {
+      uint32_t index = iterator.structural_child_index - 1;
+      if (index < field_map->child_index) continue;
+
+      // Hidden nodes' fields are "inherited" by their visible parent.
+      if (field_map->inherited) {
+
+        // If this is the *last* possible child node for this field,
+        // then perform a tail call to avoid recursion.
+        if (field_map + 1 == field_map_end) {
+          self = child;
+          goto recur;
+        }
+
+        // Otherwise, descend into this child, but if it doesn't contain
+        // the field, continue searching subsequent children.
+        else {
+          TSNode result = ts_node_child_by_field_id(child, field_id);
+          if (result.id) return result;
+          field_map++;
+          if (field_map == field_map_end) return ts_node__null();
+        }
+      }
+
+      else if (ts_node__is_relevant(child, true)) {
+        return child;
+      }
+
+      // If the field refers to a hidden node, return its first visible
+      // child.
+      else {
+        return ts_node_child(child, 0);
+      }
+    }
+  }
+
+  return ts_node__null();
+}
+
+TSNode ts_node_child_by_field_name(
+  TSNode self,
+  const char *name,
+  uint32_t name_length
+) {
+  TSFieldId field_id = ts_language_field_id_for_name(
+    self.tree->language,
+    name,
+    name_length
+  );
+  return ts_node_child_by_field_id(self, field_id);
+}
+
+uint32_t ts_node_child_count(TSNode self) {
+  Subtree tree = ts_node__subtree(self);
+  if (ts_subtree_child_count(tree) > 0) {
+    return tree.ptr->visible_child_count;
+  } else {
+    return 0;
+  }
+}
+
+uint32_t ts_node_named_child_count(TSNode self) {
+  Subtree tree = ts_node__subtree(self);
+  if (ts_subtree_child_count(tree) > 0) {
+    return tree.ptr->named_child_count;
+  } else {
+    return 0;
+  }
+}
+
+TSNode ts_node_next_sibling(TSNode self) {
+  return ts_node__next_sibling(self, true);
+}
+
+TSNode ts_node_next_named_sibling(TSNode self) {
+  return ts_node__next_sibling(self, false);
+}
+
+TSNode ts_node_prev_sibling(TSNode self) {
+  return ts_node__prev_sibling(self, true);
+}
+
+TSNode ts_node_prev_named_sibling(TSNode self) {
+  return ts_node__prev_sibling(self, false);
+}
+
+TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte) {
+  return ts_node__first_child_for_byte(self, byte, true);
+}
+
+TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte) {
+  return ts_node__first_child_for_byte(self, byte, false);
+}
+
+TSNode ts_node_descendant_for_byte_range(
+  TSNode self,
+  uint32_t start,
+  uint32_t end
+) {
+  return ts_node__descendant_for_byte_range(self, start, end, true);
+}
+
+TSNode ts_node_named_descendant_for_byte_range(
+  TSNode self,
+  uint32_t start,
+  uint32_t end
+) {
+  return ts_node__descendant_for_byte_range(self, start, end, false);
+}
+
+TSNode ts_node_descendant_for_point_range(
+  TSNode self,
+  TSPoint start,
+  TSPoint end
+) {
+  return ts_node__descendant_for_point_range(self, start, end, true);
+}
+
+TSNode ts_node_named_descendant_for_point_range(
+  TSNode self,
+  TSPoint start,
+  TSPoint end
+) {
+  return ts_node__descendant_for_point_range(self, start, end, false);
+}
+
+void ts_node_edit(TSNode *self, const TSInputEdit *edit) {
+  uint32_t start_byte = ts_node_start_byte(*self);
+  TSPoint start_point = ts_node_start_point(*self);
+
+  if (start_byte >= edit->old_end_byte) {
+    start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte);
+    start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point));
+  } else if (start_byte > edit->start_byte) {
+    start_byte = edit->new_end_byte;
+    start_point = edit->new_end_point;
+  }
+
+  self->context[0] = start_byte;
+  self->context[1] = start_point.row;
+  self->context[2] = start_point.column;
+}
--- a/shlr/tree-sitter/lib/src/parser.c
+++ b/shlr/tree-sitter/lib/src/parser.c
--- a/shlr/tree-sitter/lib/src/point.h
+++ b/shlr/tree-sitter/lib/src/point.h
@ -0,0 +1,54 @@
+#ifndef TREE_SITTER_POINT_H_
+#define TREE_SITTER_POINT_H_
+
+#include "tree_sitter/api.h"
+
+#define POINT_ZERO ((TSPoint) {0, 0})
+#define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX})
+
+static inline TSPoint point__new(unsigned row, unsigned column) {
+  TSPoint result = {row, column};
+  return result;
+}
+
+static inline TSPoint point_add(TSPoint a, TSPoint b) {
+  if (b.row > 0)
+    return point__new(a.row + b.row, b.column);
+  else
+    return point__new(a.row, a.column + b.column);
+}
+
+static inline TSPoint point_sub(TSPoint a, TSPoint b) {
+  if (a.row > b.row)
+    return point__new(a.row - b.row, a.column);
+  else
+    return point__new(0, a.column - b.column);
+}
+
+static inline bool point_lte(TSPoint a, TSPoint b) {
+  return (a.row < b.row) || (a.row == b.row && a.column <= b.column);
+}
+
+static inline bool point_lt(TSPoint a, TSPoint b) {
+  return (a.row < b.row) || (a.row == b.row && a.column < b.column);
+}
+
+static inline bool point_eq(TSPoint a, TSPoint b) {
+  return a.row == b.row && a.column == b.column;
+}
+
+static inline TSPoint point_min(TSPoint a, TSPoint b) {
+  if (a.row < b.row || (a.row == b.row && a.column < b.column))
+    return a;
+  else
+    return b;
+}
+
+static inline TSPoint point_max(TSPoint a, TSPoint b) {
+  if (a.row > b.row || (a.row == b.row && a.column > b.column))
+    return a;
+  else
+    return b;
+}
+
+#endif
--- a/shlr/tree-sitter/lib/src/query.c
+++ b/shlr/tree-sitter/lib/src/query.c
--- a/shlr/tree-sitter/lib/src/reduce_action.h
+++ b/shlr/tree-sitter/lib/src/reduce_action.h
@ -0,0 +1,34 @@
+#ifndef TREE_SITTER_REDUCE_ACTION_H_
+#define TREE_SITTER_REDUCE_ACTION_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./array.h"
+#include "tree_sitter/api.h"
+
+typedef struct {
+  uint32_t count;
+  TSSymbol symbol;
+  int dynamic_precedence;
+  unsigned short production_id;
+} ReduceAction;
+
+typedef Array(ReduceAction) ReduceActionSet;
+
+static inline void ts_reduce_action_set_add(ReduceActionSet *self,
+                                            ReduceAction new_action) {
+  for (uint32_t i = 0; i < self->size; i++) {
+    ReduceAction action = self->contents[i];
+    if (action.symbol == new_action.symbol && action.count == new_action.count)
+      return;
+  }
+  array_push(self, new_action);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_REDUCE_ACTION_H_
--- a/shlr/tree-sitter/lib/src/reusable_node.h
+++ b/shlr/tree-sitter/lib/src/reusable_node.h
@ -0,0 +1,88 @@
+#include "./subtree.h"
+
+typedef struct {
+  Subtree tree;
+  uint32_t child_index;
+  uint32_t byte_offset;
+} StackEntry;
+
+typedef struct {
+  Array(StackEntry) stack;
+  Subtree last_external_token;
+} ReusableNode;
+
+static inline ReusableNode reusable_node_new(void) {
+  return (ReusableNode) {array_new(), NULL_SUBTREE};
+}
+
+static inline void reusable_node_clear(ReusableNode *self) {
+  array_clear(&self->stack);
+  self->last_external_token = NULL_SUBTREE;
+}
+
+static inline void reusable_node_reset(ReusableNode *self, Subtree tree) {
+  reusable_node_clear(self);
+  array_push(&self->stack, ((StackEntry) {
+    .tree = tree,
+    .child_index = 0,
+    .byte_offset = 0,
+  }));
+}
+
+static inline Subtree reusable_node_tree(ReusableNode *self) {
+  return self->stack.size > 0
+    ? self->stack.contents[self->stack.size - 1].tree
+    : NULL_SUBTREE;
+}
+
+static inline uint32_t reusable_node_byte_offset(ReusableNode *self) {
+  return self->stack.size > 0
+    ? self->stack.contents[self->stack.size - 1].byte_offset
+    : UINT32_MAX;
+}
+
+static inline void reusable_node_delete(ReusableNode *self) {
+  array_delete(&self->stack);
+}
+
+static inline void reusable_node_advance(ReusableNode *self) {
+  StackEntry last_entry = *array_back(&self->stack);
+  uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree);
+  if (ts_subtree_has_external_tokens(last_entry.tree)) {
+    self->last_external_token = ts_subtree_last_external_token(last_entry.tree);
+  }
+
+  Subtree tree;
+  uint32_t next_index;
+  do {
+    StackEntry popped_entry = array_pop(&self->stack);
+    next_index = popped_entry.child_index + 1;
+    if (self->stack.size == 0) return;
+    tree = array_back(&self->stack)->tree;
+  } while (ts_subtree_child_count(tree) <= next_index);
+
+  array_push(&self->stack, ((StackEntry) {
+    .tree = tree.ptr->children[next_index],
+    .child_index = next_index,
+    .byte_offset = byte_offset,
+  }));
+}
+
+static inline bool reusable_node_descend(ReusableNode *self) {
+  StackEntry last_entry = *array_back(&self->stack);
+  if (ts_subtree_child_count(last_entry.tree) > 0) {
+    array_push(&self->stack, ((StackEntry) {
+      .tree = last_entry.tree.ptr->children[0],
+      .child_index = 0,
+      .byte_offset = last_entry.byte_offset,
+    }));
+    return true;
+  } else {
+    return false;
+  }
+}
+
+static inline void reusable_node_advance_past_leaf(ReusableNode *self) {
+  while (reusable_node_descend(self)) {}
+  reusable_node_advance(self);
+}
--- a/shlr/tree-sitter/lib/src/stack.c
+++ b/shlr/tree-sitter/lib/src/stack.c
@ -0,0 +1,846 @@
+#include "./alloc.h"
+#include "./language.h"
+#include "./subtree.h"
+#include "./array.h"
+#include "./stack.h"
+#include "./length.h"
+#include <assert.h>
+#include <stdio.h>
+
+#define MAX_LINK_COUNT 8
+#define MAX_NODE_POOL_SIZE 50
+#define MAX_ITERATOR_COUNT 64
+
+#if defined _WIN32 && !defined __GNUC__
+#define inline __forceinline
+#else
+#define inline static inline __attribute__((always_inline))
+#endif
+
+typedef struct StackNode StackNode;
+
+typedef struct {
+  StackNode *node;
+  Subtree subtree;
+  bool is_pending;
+} StackLink;
+
+struct StackNode {
+  TSStateId state;
+  Length position;
+  StackLink links[MAX_LINK_COUNT];
+  short unsigned int link_count;
+  uint32_t ref_count;
+  unsigned error_cost;
+  unsigned node_count;
+  int dynamic_precedence;
+};
+
+typedef struct {
+  StackNode *node;
+  SubtreeArray subtrees;
+  uint32_t subtree_count;
+  bool is_pending;
+} StackIterator;
+
+typedef struct {
+  void *payload;
+  StackIterateCallback callback;
+} StackIterateSession;
+
+typedef Array(StackNode *) StackNodeArray;
+
+typedef enum {
+  StackStatusActive,
+  StackStatusPaused,
+  StackStatusHalted,
+} StackStatus;
+
+typedef struct {
+  StackNode *node;
+  Subtree last_external_token;
+  StackSummary *summary;
+  unsigned node_count_at_last_error;
+  TSSymbol lookahead_when_paused;
+  StackStatus status;
+} StackHead;
+
+struct Stack {
+  Array(StackHead) heads;
+  StackSliceArray slices;
+  Array(StackIterator) iterators;
+  StackNodeArray node_pool;
+  StackNode *base_node;
+  SubtreePool *subtree_pool;
+};
+
+typedef unsigned StackAction;
+enum {
+  StackActionNone,
+  StackActionStop = 1,
+  StackActionPop = 2,
+};
+
+typedef StackAction (*StackCallback)(void *, const StackIterator *);
+
+static void stack_node_retain(StackNode *self) {
+  if (!self)
+    return;
+  assert(self->ref_count > 0);
+  self->ref_count++;
+  assert(self->ref_count != 0);
+}
+
+static void stack_node_release(StackNode *self, StackNodeArray *pool, SubtreePool *subtree_pool) {
+recur:
+  assert(self->ref_count != 0);
+  self->ref_count--;
+  if (self->ref_count > 0) return;
+
+  StackNode *first_predecessor = NULL;
+  if (self->link_count > 0) {
+    for (unsigned i = self->link_count - 1; i > 0; i--) {
+      StackLink link = self->links[i];
+      if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
+      stack_node_release(link.node, pool, subtree_pool);
+    }
+    StackLink link = self->links[0];
+    if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
+    first_predecessor = self->links[0].node;
+  }
+
+  if (pool->size < MAX_NODE_POOL_SIZE) {
+    array_push(pool, self);
+  } else {
+    ts_free(self);
+  }
+
+  if (first_predecessor) {
+    self = first_predecessor;
+    goto recur;
+  }
+}
+
+static StackNode *stack_node_new(StackNode *previous_node, Subtree subtree,
+                                 bool is_pending, TSStateId state, StackNodeArray *pool) {
+  StackNode *node = pool->size > 0 ?
+    array_pop(pool) :
+    ts_malloc(sizeof(StackNode));
+  *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state};
+
+  if (previous_node) {
+    node->link_count = 1;
+    node->links[0] = (StackLink){
+      .node = previous_node,
+      .subtree = subtree,
+      .is_pending = is_pending,
+    };
+
+    node->position = previous_node->position;
+    node->error_cost = previous_node->error_cost;
+    node->dynamic_precedence = previous_node->dynamic_precedence;
+    node->node_count = previous_node->node_count;
+
+    if (subtree.ptr) {
+      node->error_cost += ts_subtree_error_cost(subtree);
+      node->position = length_add(node->position, ts_subtree_total_size(subtree));
+      node->node_count += ts_subtree_node_count(subtree);
+      node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree);
+    }
+  } else {
+    node->position = length_zero();
+    node->error_cost = 0;
+  }
+
+  return node;
+}
+
+static bool stack__subtree_is_equivalent(Subtree left, Subtree right) {
+  return
+    left.ptr == right.ptr ||
+    (left.ptr && right.ptr &&
+     ts_subtree_symbol(left) == ts_subtree_symbol(right) &&
+     ((ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) ||
+      (ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes &&
+       ts_subtree_size(left).bytes == ts_subtree_size(right).bytes &&
+       ts_subtree_child_count(left) == ts_subtree_child_count(right) &&
+       ts_subtree_extra(left) == ts_subtree_extra(right) &&
+       ts_subtree_external_scanner_state_eq(left, right))));
+}
+
+static void stack_node_add_link(StackNode *self, StackLink link, SubtreePool *subtree_pool) {
+  if (link.node == self) return;
+
+  for (int i = 0; i < self->link_count; i++) {
+    StackLink *existing_link = &self->links[i];
+    if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) {
+      // In general, we preserve ambiguities until they are removed from the stack
+      // during a pop operation where multiple paths lead to the same node. But in
+      // the special case where two links directly connect the same pair of nodes,
+      // we can safely remove the ambiguity ahead of time without changing behavior.
+      if (existing_link->node == link.node) {
+        if (
+          ts_subtree_dynamic_precedence(link.subtree) >
+          ts_subtree_dynamic_precedence(existing_link->subtree)
+        ) {
+          ts_subtree_retain(link.subtree);
+          ts_subtree_release(subtree_pool, existing_link->subtree);
+          existing_link->subtree = link.subtree;
+          self->dynamic_precedence =
+            link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree);
+        }
+        return;
+      }
+
+      // If the previous nodes are mergeable, merge them recursively.
+      if (existing_link->node->state == link.node->state &&
+          existing_link->node->position.bytes == link.node->position.bytes) {
+        for (int j = 0; j < link.node->link_count; j++) {
+          stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool);
+        }
+        int32_t dynamic_precedence = link.node->dynamic_precedence;
+        if (link.subtree.ptr) {
+          dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
+        }
+        if (dynamic_precedence > self->dynamic_precedence) {
+          self->dynamic_precedence = dynamic_precedence;
+        }
+        return;
+      }
+    }
+  }
+
+  if (self->link_count == MAX_LINK_COUNT) return;
+
+  stack_node_retain(link.node);
+  unsigned node_count = link.node->node_count;
+  int dynamic_precedence = link.node->dynamic_precedence;
+  self->links[self->link_count++] = link;
+
+  if (link.subtree.ptr) {
+    ts_subtree_retain(link.subtree);
+    node_count += ts_subtree_node_count(link.subtree);
+    dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
+  }
+
+  if (node_count > self->node_count) self->node_count = node_count;
+  if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence;
+}
+
+static void stack_head_delete(StackHead *self, StackNodeArray *pool, SubtreePool *subtree_pool) {
+  if (self->node) {
+    if (self->last_external_token.ptr) {
+      ts_subtree_release(subtree_pool, self->last_external_token);
+    }
+    if (self->summary) {
+      array_delete(self->summary);
+      ts_free(self->summary);
+    }
+    stack_node_release(self->node, pool, subtree_pool);
+  }
+}
+
+static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version,
+                                          StackNode *node) {
+  StackHead head = {
+    .node = node,
+    .node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error,
+    .last_external_token = self->heads.contents[original_version].last_external_token,
+    .status = StackStatusActive,
+    .lookahead_when_paused = 0,
+  };
+  array_push(&self->heads, head);
+  stack_node_retain(node);
+  if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token);
+  return (StackVersion)(self->heads.size - 1);
+}
+
+static void ts_stack__add_slice(Stack *self, StackVersion original_version,
+                                StackNode *node, SubtreeArray *subtrees) {
+  for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
+    StackVersion version = self->slices.contents[i].version;
+    if (self->heads.contents[version].node == node) {
+      StackSlice slice = {*subtrees, version};
+      array_insert(&self->slices, i + 1, slice);
+      return;
+    }
+  }
+
+  StackVersion version = ts_stack__add_version(self, original_version, node);
+  StackSlice slice = { *subtrees, version };
+  array_push(&self->slices, slice);
+}
+
+inline StackSliceArray stack__iter(Stack *self, StackVersion version,
+                                   StackCallback callback, void *payload,
+                                   int goal_subtree_count) {
+  array_clear(&self->slices);
+  array_clear(&self->iterators);
+
+  StackHead *head = array_get(&self->heads, version);
+  StackIterator iterator = {
+    .node = head->node,
+    .subtrees = array_new(),
+    .subtree_count = 0,
+    .is_pending = true,
+  };
+
+  bool include_subtrees = false;
+  if (goal_subtree_count >= 0) {
+    include_subtrees = true;
+    array_reserve(&iterator.subtrees, goal_subtree_count);
+  }
+
+  array_push(&self->iterators, iterator);
+
+  while (self->iterators.size > 0) {
+    for (uint32_t i = 0, size = self->iterators.size; i < size; i++) {
+      StackIterator *iterator = &self->iterators.contents[i];
+      StackNode *node = iterator->node;
+
+      StackAction action = callback(payload, iterator);
+      bool should_pop = action & StackActionPop;
+      bool should_stop = action & StackActionStop || node->link_count == 0;
+
+      if (should_pop) {
+        SubtreeArray subtrees = iterator->subtrees;
+        if (!should_stop)
+          ts_subtree_array_copy(subtrees, &subtrees);
+        ts_subtree_array_reverse(&subtrees);
+        ts_stack__add_slice(
+          self,
+          version,
+          node,
+          &subtrees
+        );
+      }
+
+      if (should_stop) {
+        if (!should_pop)
+          ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees);
+        array_erase(&self->iterators, i);
+        i--, size--;
+        continue;
+      }
+
+      for (uint32_t j = 1; j <= node->link_count; j++) {
+        StackIterator *next_iterator;
+        StackLink link;
+        if (j == node->link_count) {
+          link = node->links[0];
+          next_iterator = &self->iterators.contents[i];
+        } else {
+          if (self->iterators.size >= MAX_ITERATOR_COUNT) continue;
+          link = node->links[j];
+          StackIterator current_iterator = self->iterators.contents[i];
+          array_push(&self->iterators, current_iterator);
+          next_iterator = array_back(&self->iterators);
+          ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees);
+        }
+
+        next_iterator->node = link.node;
+        if (link.subtree.ptr) {
+          if (include_subtrees) {
+            array_push(&next_iterator->subtrees, link.subtree);
+            ts_subtree_retain(link.subtree);
+          }
+
+          if (!ts_subtree_extra(link.subtree)) {
+            next_iterator->subtree_count++;
+            if (!link.is_pending) {
+              next_iterator->is_pending = false;
+            }
+          }
+        } else {
+          next_iterator->subtree_count++;
+          next_iterator->is_pending = false;
+        }
+      }
+    }
+  }
+
+  return self->slices;
+}
+
+Stack *ts_stack_new(SubtreePool *subtree_pool) {
+  Stack *self = ts_calloc(1, sizeof(Stack));
+
+  array_init(&self->heads);
+  array_init(&self->slices);
+  array_init(&self->iterators);
+  array_init(&self->node_pool);
+  array_reserve(&self->heads, 4);
+  array_reserve(&self->slices, 4);
+  array_reserve(&self->iterators, 4);
+  array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE);
+
+  self->subtree_pool = subtree_pool;
+  self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool);
+  ts_stack_clear(self);
+
+  return self;
+}
+
+void ts_stack_delete(Stack *self) {
+  if (self->slices.contents)
+    array_delete(&self->slices);
+  if (self->iterators.contents)
+    array_delete(&self->iterators);
+  stack_node_release(self->base_node, &self->node_pool, self->subtree_pool);
+  for (uint32_t i = 0; i < self->heads.size; i++) {
+    stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
+  }
+  array_clear(&self->heads);
+  if (self->node_pool.contents) {
+    for (uint32_t i = 0; i < self->node_pool.size; i++)
+      ts_free(self->node_pool.contents[i]);
+    array_delete(&self->node_pool);
+  }
+  array_delete(&self->heads);
+  ts_free(self);
+}
+
+uint32_t ts_stack_version_count(const Stack *self) {
+  return self->heads.size;
+}
+
+TSStateId ts_stack_state(const Stack *self, StackVersion version) {
+  return array_get(&self->heads, version)->node->state;
+}
+
+Length ts_stack_position(const Stack *self, StackVersion version) {
+  return array_get(&self->heads, version)->node->position;
+}
+
+Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) {
+  return array_get(&self->heads, version)->last_external_token;
+}
+
+void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) {
+  StackHead *head = array_get(&self->heads, version);
+  if (token.ptr) ts_subtree_retain(token);
+  if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token);
+  head->last_external_token = token;
+}
+
+unsigned ts_stack_error_cost(const Stack *self, StackVersion version) {
+  StackHead *head = array_get(&self->heads, version);
+  unsigned result = head->node->error_cost;
+  if (
+    head->status == StackStatusPaused ||
+    (head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) {
+    result += ERROR_COST_PER_RECOVERY;
+  }
+  return result;
+}
+
+unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) {
+  StackHead *head = array_get(&self->heads, version);
+  if (head->node->node_count < head->node_count_at_last_error) {
+    head->node_count_at_last_error = head->node->node_count;
+  }
+  return head->node->node_count - head->node_count_at_last_error;
+}
+
+void ts_stack_push(Stack *self, StackVersion version, Subtree subtree,
+                   bool pending, TSStateId state) {
+  StackHead *head = array_get(&self->heads, version);
+  StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool);
+  if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count;
+  head->node = new_node;
+}
+
+inline StackAction iterate_callback(void *payload, const StackIterator *iterator) {
+  StackIterateSession *session = payload;
+  session->callback(
+    session->payload,
+    iterator->node->state,
+    iterator->subtree_count
+  );
+  return StackActionNone;
+}
+
+void ts_stack_iterate(Stack *self, StackVersion version,
+                      StackIterateCallback callback, void *payload) {
+  StackIterateSession session = {payload, callback};
+  stack__iter(self, version, iterate_callback, &session, -1);
+}
+
+inline StackAction pop_count_callback(void *payload, const StackIterator *iterator) {
+  unsigned *goal_subtree_count = payload;
+  if (iterator->subtree_count == *goal_subtree_count) {
+    return StackActionPop | StackActionStop;
+  } else {
+    return StackActionNone;
+  }
+}
+
+StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) {
+  return stack__iter(self, version, pop_count_callback, &count, count);
+}
+
+inline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) {
+  if (iterator->subtree_count >= 1) {
+    if (iterator->is_pending) {
+      return StackActionPop | StackActionStop;
+    } else {
+      return StackActionStop;
+    }
+  } else {
+    return StackActionNone;
+  }
+}
+
+StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) {
+  StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0);
+  if (pop.size > 0) {
+    ts_stack_renumber_version(self, pop.contents[0].version, version);
+    pop.contents[0].version = version;
+  }
+  return pop;
+}
+
+inline StackAction pop_error_callback(void *payload, const StackIterator *iterator) {
+  if (iterator->subtrees.size > 0) {
+    bool *found_error = payload;
+    if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) {
+      *found_error = true;
+      return StackActionPop | StackActionStop;
+    } else {
+      return StackActionStop;
+    }
+  } else {
+    return StackActionNone;
+  }
+}
+
+SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) {
+  StackNode *node = array_get(&self->heads, version)->node;
+  for (unsigned i = 0; i < node->link_count; i++) {
+    if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) {
+      bool found_error = false;
+      StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1);
+      if (pop.size > 0) {
+        assert(pop.size == 1);
+        ts_stack_renumber_version(self, pop.contents[0].version, version);
+        return pop.contents[0].subtrees;
+      }
+      break;
+    }
+  }
+  return (SubtreeArray){.size = 0};
+}
+
+inline StackAction pop_all_callback(void *payload, const StackIterator *iterator) {
+  return iterator->node->link_count == 0 ? StackActionPop : StackActionNone;
+}
+
+StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) {
+  return stack__iter(self, version, pop_all_callback, NULL, 0);
+}
+
+typedef struct {
+  StackSummary *summary;
+  unsigned max_depth;
+} SummarizeStackSession;
+
+inline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) {
+  SummarizeStackSession *session = payload;
+  TSStateId state = iterator->node->state;
+  unsigned depth = iterator->subtree_count;
+  if (depth > session->max_depth) return StackActionStop;
+  for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) {
+    StackSummaryEntry entry = session->summary->contents[i];
+    if (entry.depth < depth) break;
+    if (entry.depth == depth && entry.state == state) return StackActionNone;
+  }
+  array_push(session->summary, ((StackSummaryEntry){
+    .position = iterator->node->position,
+    .depth = depth,
+    .state = state,
+  }));
+  return StackActionNone;
+}
+
+void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) {
+  SummarizeStackSession session = {
+    .summary = ts_malloc(sizeof(StackSummary)),
+    .max_depth = max_depth
+  };
+  array_init(session.summary);
+  stack__iter(self, version, summarize_stack_callback, &session, -1);
+  self->heads.contents[version].summary = session.summary;
+}
+
+StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) {
+  return array_get(&self->heads, version)->summary;
+}
+
+int ts_stack_dynamic_precedence(Stack *self, StackVersion version) {
+  return array_get(&self->heads, version)->node->dynamic_precedence;
+}
+
+bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) {
+  const StackHead *head = array_get(&self->heads, version);
+  const StackNode *node = head->node;
+  if (node->error_cost == 0) return true;
+  while (node) {
+    if (node->link_count > 0) {
+      Subtree subtree = node->links[0].subtree;
+      if (subtree.ptr) {
+        if (ts_subtree_total_bytes(subtree) > 0) {
+          return true;
+        } else if (
+          node->node_count > head->node_count_at_last_error &&
+          ts_subtree_error_cost(subtree) == 0
+        ) {
+          node = node->links[0].node;
+          continue;
+        }
+      }
+    }
+    break;
+  }
+  return false;
+}
+
+void ts_stack_remove_version(Stack *self, StackVersion version) {
+  stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool);
+  array_erase(&self->heads, version);
+}
+
+void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) {
+  if (v1 == v2) return;
+  assert(v2 < v1);
+  assert((uint32_t)v1 < self->heads.size);
+  StackHead *source_head = &self->heads.contents[v1];
+  StackHead *target_head = &self->heads.contents[v2];
+  if (target_head->summary && !source_head->summary) {
+    source_head->summary = target_head->summary;
+    target_head->summary = NULL;
+  }
+  stack_head_delete(target_head, &self->node_pool, self->subtree_pool);
+  *target_head = *source_head;
+  array_erase(&self->heads, v1);
+}
+
+void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) {
+  StackHead temporary_head = self->heads.contents[v1];
+  self->heads.contents[v1] = self->heads.contents[v2];
+  self->heads.contents[v2] = temporary_head;
+}
+
+StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
+  assert(version < self->heads.size);
+  array_push(&self->heads, self->heads.contents[version]);
+  StackHead *head = array_back(&self->heads);
+  stack_node_retain(head->node);
+  if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token);
+  head->summary = NULL;
+  return self->heads.size - 1;
+}
+
+bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) {
+  if (!ts_stack_can_merge(self, version1, version2)) return false;
+  StackHead *head1 = &self->heads.contents[version1];
+  StackHead *head2 = &self->heads.contents[version2];
+  for (uint32_t i = 0; i < head2->node->link_count; i++) {
+    stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool);
+  }
+  if (head1->node->state == ERROR_STATE) {
+    head1->node_count_at_last_error = head1->node->node_count;
+  }
+  ts_stack_remove_version(self, version2);
+  return true;
+}
+
+bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) {
+  StackHead *head1 = &self->heads.contents[version1];
+  StackHead *head2 = &self->heads.contents[version2];
+  return
+    head1->status == StackStatusActive &&
+    head2->status == StackStatusActive &&
+    head1->node->state == head2->node->state &&
+    head1->node->position.bytes == head2->node->position.bytes &&
+    head1->node->error_cost == head2->node->error_cost &&
+    ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token);
+}
+
+void ts_stack_halt(Stack *self, StackVersion version) {
+  array_get(&self->heads, version)->status = StackStatusHalted;
+}
+
+void ts_stack_pause(Stack *self, StackVersion version, TSSymbol lookahead) {
+  StackHead *head = array_get(&self->heads, version);
+  head->status = StackStatusPaused;
+  head->lookahead_when_paused = lookahead;
+  head->node_count_at_last_error = head->node->node_count;
+}
+
+bool ts_stack_is_active(const Stack *self, StackVersion version) {
+  return array_get(&self->heads, version)->status == StackStatusActive;
+}
+
+bool ts_stack_is_halted(const Stack *self, StackVersion version) {
+  return array_get(&self->heads, version)->status == StackStatusHalted;
+}
+
+bool ts_stack_is_paused(const Stack *self, StackVersion version) {
+  return array_get(&self->heads, version)->status == StackStatusPaused;
+}
+
+TSSymbol ts_stack_resume(Stack *self, StackVersion version) {
+  StackHead *head = array_get(&self->heads, version);
+  assert(head->status == StackStatusPaused);
+  TSSymbol result = head->lookahead_when_paused;
+  head->status = StackStatusActive;
+  head->lookahead_when_paused = 0;
+  return result;
+}
+
+void ts_stack_clear(Stack *self) {
+  stack_node_retain(self->base_node);
+  for (uint32_t i = 0; i < self->heads.size; i++) {
+    stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
+  }
+  array_clear(&self->heads);
+  array_push(&self->heads, ((StackHead){
+    .node = self->base_node,
+    .last_external_token = NULL_SUBTREE,
+    .status = StackStatusActive,
+    .lookahead_when_paused = 0,
+  }));
+}
+
+bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) {
+  array_reserve(&self->iterators, 32);
+  bool was_recording_allocations = ts_toggle_allocation_recording(false);
+  if (!f) f = stderr;
+
+  fprintf(f, "digraph stack {\n");
+  fprintf(f, "rankdir=\"RL\";\n");
+  fprintf(f, "edge [arrowhead=none]\n");
+
+  Array(StackNode *) visited_nodes = array_new();
+
+  array_clear(&self->iterators);
+  for (uint32_t i = 0; i < self->heads.size; i++) {
+    StackHead *head = &self->heads.contents[i];
+    if (head->status == StackStatusHalted) continue;
+
+    fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i);
+    fprintf(f, "node_head_%u -> node_%p [", i, head->node);
+
+    if (head->status == StackStatusPaused) {
+      fprintf(f, "color=red ");
+    }
+    fprintf(f,
+      "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u",
+      i,
+      ts_stack_node_count_since_error(self, i),
+      ts_stack_error_cost(self, i)
+    );
+
+    if (head->last_external_token.ptr) {
+      const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state;
+      const char *data = ts_external_scanner_state_data(state);
+      fprintf(f, "\nexternal_scanner_state:");
+      for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
+    }
+
+    fprintf(f, "\"]\n");
+    array_push(&self->iterators, ((StackIterator){.node = head->node }));
+  }
+
+  bool all_iterators_done = false;
+  while (!all_iterators_done) {
+    all_iterators_done = true;
+
+    for (uint32_t i = 0; i < self->iterators.size; i++) {
+      StackIterator iterator = self->iterators.contents[i];
+      StackNode *node = iterator.node;
+
+      for (uint32_t j = 0; j < visited_nodes.size; j++) {
+        if (visited_nodes.contents[j] == node) {
+          node = NULL;
+          break;
+        }
+      }
+
+      if (!node) continue;
+      all_iterators_done = false;
+
+      fprintf(f, "node_%p [", node);
+      if (node->state == ERROR_STATE) {
+        fprintf(f, "label=\"?\"");
+      } else if (
+        node->link_count == 1 &&
+        node->links[0].subtree.ptr &&
+        ts_subtree_extra(node->links[0].subtree)
+      ) {
+        fprintf(f, "shape=point margin=0 label=\"\"");
+      } else {
+        fprintf(f, "label=\"%d\"", node->state);
+      }
+
+      fprintf(
+        f,
+        " tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n",
+        node->position.extent.row + 1,
+        node->position.extent.column,
+        node->node_count,
+        node->error_cost,
+        node->dynamic_precedence
+      );
+
+      for (int j = 0; j < node->link_count; j++) {
+        StackLink link = node->links[j];
+        fprintf(f, "node_%p -> node_%p [", node, link.node);
+        if (link.is_pending) fprintf(f, "style=dashed ");
+        if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray ");
+
+        if (!link.subtree.ptr) {
+          fprintf(f, "color=red");
+        } else {
+          fprintf(f, "label=\"");
+          bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree);
+          if (quoted) fprintf(f, "'");
+          const char *name = ts_language_symbol_name(language, ts_subtree_symbol(link.subtree));
+          for (const char *c = name; *c; c++) {
+            if (*c == '\"' || *c == '\\') fprintf(f, "\\");
+            fprintf(f, "%c", *c);
+          }
+          if (quoted) fprintf(f, "'");
+          fprintf(f, "\"");
+          fprintf(
+            f,
+            "labeltooltip=\"error_cost: %u\ndynamic_precedence: %u\"",
+            ts_subtree_error_cost(link.subtree),
+            ts_subtree_dynamic_precedence(link.subtree)
+          );
+        }
+
+        fprintf(f, "];\n");
+
+        StackIterator *next_iterator;
+        if (j == 0) {
+          next_iterator = &self->iterators.contents[i];
+        } else {
+          array_push(&self->iterators, iterator);
+          next_iterator = array_back(&self->iterators);
+        }
+        next_iterator->node = link.node;
+      }
+
+      array_push(&visited_nodes, node);
+    }
+  }
+
+  fprintf(f, "}\n");
+
+  array_delete(&visited_nodes);
+  ts_toggle_allocation_recording(was_recording_allocations);
+  return true;
+}
+
+#undef inline
--- a/shlr/tree-sitter/lib/src/stack.h
+++ b/shlr/tree-sitter/lib/src/stack.h
@ -0,0 +1,135 @@
+#ifndef TREE_SITTER_PARSE_STACK_H_
+#define TREE_SITTER_PARSE_STACK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./array.h"
+#include "./subtree.h"
+#include "./error_costs.h"
+#include <stdio.h>
+
+typedef struct Stack Stack;
+
+typedef unsigned StackVersion;
+#define STACK_VERSION_NONE ((StackVersion)-1)
+
+typedef struct {
+  SubtreeArray subtrees;
+  StackVersion version;
+} StackSlice;
+typedef Array(StackSlice) StackSliceArray;
+
+typedef struct {
+  Length position;
+  unsigned depth;
+  TSStateId state;
+} StackSummaryEntry;
+typedef Array(StackSummaryEntry) StackSummary;
+
+// Create a stack.
+Stack *ts_stack_new(SubtreePool *);
+
+// Release the memory reserved for a given stack.
+void ts_stack_delete(Stack *);
+
+// Get the stack's current number of versions.
+uint32_t ts_stack_version_count(const Stack *);
+
+// Get the state at the top of the given version of the stack. If the stack is
+// empty, this returns the initial state, 0.
+TSStateId ts_stack_state(const Stack *, StackVersion);
+
+// Get the last external token associated with a given version of the stack.
+Subtree ts_stack_last_external_token(const Stack *, StackVersion);
+
+// Set the last external token associated with a given version of the stack.
+void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree );
+
+// Get the position of the given version of the stack within the document.
+Length ts_stack_position(const Stack *, StackVersion);
+
+// Push a tree and state onto the given version of the stack.
+//
+// This transfers ownership of the tree to the Stack. Callers that
+// need to retain ownership of the tree for their own purposes should
+// first retain the tree.
+void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId);
+
+// Pop the given number of entries from the given version of the stack. This
+// operation can increase the number of stack versions by revealing multiple
+// versions which had previously been merged. It returns an array that
+// specifies the index of each revealed version and the trees that were
+// removed from that version.
+StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count);
+
+// Remove an error at the top of the given version of the stack.
+SubtreeArray ts_stack_pop_error(Stack *, StackVersion);
+
+// Remove any pending trees from the top of the given version of the stack.
+StackSliceArray ts_stack_pop_pending(Stack *, StackVersion);
+
+// Remove any all trees from the given version of the stack.
+StackSliceArray ts_stack_pop_all(Stack *, StackVersion);
+
+// Get the maximum number of tree nodes reachable from this version of the stack
+// since the last error was detected.
+unsigned ts_stack_node_count_since_error(const Stack *, StackVersion);
+
+int ts_stack_dynamic_precedence(Stack *, StackVersion);
+
+bool ts_stack_has_advanced_since_error(const Stack *, StackVersion);
+
+// Compute a summary of all the parse states near the top of the given
+// version of the stack and store the summary for later retrieval.
+void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth);
+
+// Retrieve a summary of all the parse states near the top of the
+// given version of the stack.
+StackSummary *ts_stack_get_summary(Stack *, StackVersion);
+
+// Get the total cost of all errors on the given version of the stack.
+unsigned ts_stack_error_cost(const Stack *, StackVersion version);
+
+// Merge the given two stack versions if possible, returning true
+// if they were successfully merged and false otherwise.
+bool ts_stack_merge(Stack *, StackVersion, StackVersion);
+
+// Determine whether the given two stack versions can be merged.
+bool ts_stack_can_merge(Stack *, StackVersion, StackVersion);
+
+TSSymbol ts_stack_resume(Stack *, StackVersion);
+
+void ts_stack_pause(Stack *, StackVersion, TSSymbol);
+
+void ts_stack_halt(Stack *, StackVersion);
+
+bool ts_stack_is_active(const Stack *, StackVersion);
+
+bool ts_stack_is_paused(const Stack *, StackVersion);
+
+bool ts_stack_is_halted(const Stack *, StackVersion);
+
+void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);
+
+void ts_stack_swap_versions(Stack *, StackVersion, StackVersion);
+
+StackVersion ts_stack_copy_version(Stack *, StackVersion);
+
+// Remove the given version from the stack.
+void ts_stack_remove_version(Stack *, StackVersion);
+
+void ts_stack_clear(Stack *);
+
+bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *);
+
+typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t);
+
+void ts_stack_iterate(Stack *, StackVersion, StackIterateCallback, void *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_PARSE_STACK_H_
--- a/shlr/tree-sitter/lib/src/subtree.c
+++ b/shlr/tree-sitter/lib/src/subtree.c
@ -0,0 +1,980 @@
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include "./alloc.h"
+#include "./atomic.h"
+#include "./subtree.h"
+#include "./length.h"
+#include "./language.h"
+#include "./error_costs.h"
+#include <stddef.h>
+
+typedef struct {
+  Length start;
+  Length old_end;
+  Length new_end;
+} Edit;
+
+#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX
+#define TS_MAX_TREE_POOL_SIZE 32
+
+static const ExternalScannerState empty_state = {.length = 0, .short_data = {0}};
+
+// ExternalScannerState
+
+void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) {
+  self->length = length;
+  if (length > sizeof(self->short_data)) {
+    self->long_data = ts_malloc(length);
+    memcpy(self->long_data, data, length);
+  } else {
+    memcpy(self->short_data, data, length);
+  }
+}
+
+ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) {
+  ExternalScannerState result = *self;
+  if (self->length > sizeof(self->short_data)) {
+    result.long_data = ts_malloc(self->length);
+    memcpy(result.long_data, self->long_data, self->length);
+  }
+  return result;
+}
+
+void ts_external_scanner_state_delete(ExternalScannerState *self) {
+  if (self->length > sizeof(self->short_data)) {
+    ts_free(self->long_data);
+  }
+}
+
+const char *ts_external_scanner_state_data(const ExternalScannerState *self) {
+  if (self->length > sizeof(self->short_data)) {
+    return self->long_data;
+  } else {
+    return self->short_data;
+  }
+}
+
+bool ts_external_scanner_state_eq(const ExternalScannerState *a, const ExternalScannerState *b) {
+  return a == b || (
+    a->length == b->length &&
+    !memcmp(ts_external_scanner_state_data(a), ts_external_scanner_state_data(b), a->length)
+  );
+}
+
+// SubtreeArray
+
+void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) {
+  dest->size = self.size;
+  dest->capacity = self.capacity;
+  dest->contents = self.contents;
+  if (self.capacity > 0) {
+    dest->contents = ts_calloc(self.capacity, sizeof(Subtree));
+    memcpy(dest->contents, self.contents, self.size * sizeof(Subtree));
+    for (uint32_t i = 0; i < self.size; i++) {
+      ts_subtree_retain(dest->contents[i]);
+    }
+  }
+}
+
+void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
+  for (uint32_t i = 0; i < self->size; i++) {
+    ts_subtree_release(pool, self->contents[i]);
+  }
+  array_delete(self);
+}
+
+SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *self) {
+  SubtreeArray result = array_new();
+
+  uint32_t i = self->size - 1;
+  for (; i + 1 > 0; i--) {
+    Subtree child = self->contents[i];
+    if (!ts_subtree_extra(child)) break;
+    array_push(&result, child);
+  }
+
+  self->size = i + 1;
+  ts_subtree_array_reverse(&result);
+  return result;
+}
+
+void ts_subtree_array_reverse(SubtreeArray *self) {
+  for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) {
+    size_t reverse_index = self->size - 1 - i;
+    Subtree swap = self->contents[i];
+    self->contents[i] = self->contents[reverse_index];
+    self->contents[reverse_index] = swap;
+  }
+}
+
+// SubtreePool
+
+SubtreePool ts_subtree_pool_new(uint32_t capacity) {
+  SubtreePool self = {array_new(), array_new()};
+  array_reserve(&self.free_trees, capacity);
+  return self;
+}
+
+void ts_subtree_pool_delete(SubtreePool *self) {
+  if (self->free_trees.contents) {
+    for (unsigned i = 0; i < self->free_trees.size; i++) {
+      ts_free(self->free_trees.contents[i].ptr);
+    }
+    array_delete(&self->free_trees);
+  }
+  if (self->tree_stack.contents) array_delete(&self->tree_stack);
+}
+
+static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) {
+  if (self->free_trees.size > 0) {
+    return array_pop(&self->free_trees).ptr;
+  } else {
+    return ts_malloc(sizeof(SubtreeHeapData));
+  }
+}
+
+static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) {
+  if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) {
+    array_push(&self->free_trees, (MutableSubtree) {.ptr = tree});
+  } else {
+    ts_free(tree);
+  }
+}
+
+// Subtree
+
+static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) {
+  return
+    padding.bytes < TS_MAX_INLINE_TREE_LENGTH &&
+    padding.extent.row < 16 &&
+    padding.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
+    size.extent.row == 0 &&
+    size.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
+    lookahead_bytes < 16;
+}
+
+Subtree ts_subtree_new_leaf(
+  SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
+  uint32_t lookahead_bytes, TSStateId parse_state, bool has_external_tokens,
+  bool is_keyword, const TSLanguage *language
+) {
+  TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
+  bool extra = symbol == ts_builtin_sym_end;
+
+  bool is_inline = (
+    symbol <= UINT8_MAX &&
+    !has_external_tokens &&
+    ts_subtree_can_inline(padding, size, lookahead_bytes)
+  );
+
+  if (is_inline) {
+    return (Subtree) {{
+      .parse_state = parse_state,
+      .symbol = symbol,
+      .padding_bytes = padding.bytes,
+      .padding_rows = padding.extent.row,
+      .padding_columns = padding.extent.column,
+      .size_bytes = size.bytes,
+      .lookahead_bytes = lookahead_bytes,
+      .visible = metadata.visible,
+      .named = metadata.named,
+      .extra = extra,
+      .has_changes = false,
+      .is_missing = false,
+      .is_keyword = is_keyword,
+      .is_inline = true,
+    }};
+  } else {
+    SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
+    *data = (SubtreeHeapData) {
+      .ref_count = 1,
+      .padding = padding,
+      .size = size,
+      .lookahead_bytes = lookahead_bytes,
+      .error_cost = 0,
+      .child_count = 0,
+      .symbol = symbol,
+      .parse_state = parse_state,
+      .visible = metadata.visible,
+      .named = metadata.named,
+      .extra = extra,
+      .fragile_left = false,
+      .fragile_right = false,
+      .has_changes = false,
+      .has_external_tokens = has_external_tokens,
+      .is_missing = false,
+      .is_keyword = is_keyword,
+      .first_leaf = {.symbol = 0, .parse_state = 0},
+    };
+    return (Subtree) {.ptr = data};
+  }
+}
+
+void ts_subtree_set_symbol(
+  MutableSubtree *self,
+  TSSymbol symbol,
+  const TSLanguage *language
+) {
+  TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
+  if (self->data.is_inline) {
+    assert(symbol < UINT8_MAX);
+    self->data.symbol = symbol;
+    self->data.named = metadata.named;
+    self->data.visible = metadata.visible;
+  } else {
+    self->ptr->symbol = symbol;
+    self->ptr->named = metadata.named;
+    self->ptr->visible = metadata.visible;
+  }
+}
+
+Subtree ts_subtree_new_error(
+  SubtreePool *pool, int32_t lookahead_char, Length padding, Length size,
+  uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language
+) {
+  Subtree result = ts_subtree_new_leaf(
+    pool, ts_builtin_sym_error, padding, size, bytes_scanned,
+    parse_state, false, false, language
+  );
+  SubtreeHeapData *data = (SubtreeHeapData *)result.ptr;
+  data->fragile_left = true;
+  data->fragile_right = true;
+  data->lookahead_char = lookahead_char;
+  return result;
+}
+
+MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
+  if (self.data.is_inline) return (MutableSubtree) {self.data};
+  if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
+
+  SubtreeHeapData *result = ts_subtree_pool_allocate(pool);
+  memcpy(result, self.ptr, sizeof(SubtreeHeapData));
+  if (result->child_count > 0) {
+    result->children = ts_calloc(self.ptr->child_count, sizeof(Subtree));
+    memcpy(result->children, self.ptr->children, result->child_count * sizeof(Subtree));
+    for (uint32_t i = 0; i < result->child_count; i++) {
+      ts_subtree_retain(result->children[i]);
+    }
+  } else if (result->has_external_tokens) {
+    result->external_scanner_state = ts_external_scanner_state_copy(&self.ptr->external_scanner_state);
+  }
+  result->ref_count = 1;
+  ts_subtree_release(pool, self);
+  return (MutableSubtree) {.ptr = result};
+}
+
+static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLanguage *language,
+                                 MutableSubtreeArray *stack) {
+  unsigned initial_stack_size = stack->size;
+
+  MutableSubtree tree = self;
+  TSSymbol symbol = tree.ptr->symbol;
+  for (unsigned i = 0; i < count; i++) {
+    if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break;
+
+    MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
+    if (
+      child.data.is_inline ||
+      child.ptr->child_count < 2 ||
+      child.ptr->ref_count > 1 ||
+      child.ptr->symbol != symbol
+    ) break;
+
+    MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[0]);
+    if (
+      grandchild.data.is_inline ||
+      grandchild.ptr->child_count < 2 ||
+      grandchild.ptr->ref_count > 1 ||
+      grandchild.ptr->symbol != symbol
+    ) break;
+
+    tree.ptr->children[0] = ts_subtree_from_mut(grandchild);
+    child.ptr->children[0] = grandchild.ptr->children[grandchild.ptr->child_count - 1];
+    grandchild.ptr->children[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child);
+    array_push(stack, tree);
+    tree = grandchild;
+  }
+
+  while (stack->size > initial_stack_size) {
+    tree = array_pop(stack);
+    MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
+    MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[child.ptr->child_count - 1]);
+    ts_subtree_set_children(grandchild, grandchild.ptr->children, grandchild.ptr->child_count, language);
+    ts_subtree_set_children(child, child.ptr->children, child.ptr->child_count, language);
+    ts_subtree_set_children(tree, tree.ptr->children, tree.ptr->child_count, language);
+  }
+}
+
+void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *language) {
+  array_clear(&pool->tree_stack);
+
+  if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) {
+    array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
+  }
+
+  while (pool->tree_stack.size > 0) {
+    MutableSubtree tree = array_pop(&pool->tree_stack);
+
+    if (tree.ptr->repeat_depth > 0) {
+      Subtree child1 = tree.ptr->children[0];
+      Subtree child2 = tree.ptr->children[tree.ptr->child_count - 1];
+      long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2);
+      if (repeat_delta > 0) {
+        unsigned n = repeat_delta;
+        for (unsigned i = n / 2; i > 0; i /= 2) {
+          ts_subtree__compress(tree, i, language, &pool->tree_stack);
+          n -= i;
+        }
+      }
+    }
+
+    for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
+      Subtree child = tree.ptr->children[i];
+      if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) {
+        array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
+      }
+    }
+  }
+}
+
+void ts_subtree_set_children(
+  MutableSubtree self, Subtree *children, uint32_t child_count, const TSLanguage *language
+) {
+  assert(!self.data.is_inline);
+
+  if (self.ptr->child_count > 0 && children != self.ptr->children) {
+    ts_free(self.ptr->children);
+  }
+
+  self.ptr->child_count = child_count;
+  self.ptr->children = children;
+  self.ptr->named_child_count = 0;
+  self.ptr->visible_child_count = 0;
+  self.ptr->error_cost = 0;
+  self.ptr->repeat_depth = 0;
+  self.ptr->node_count = 1;
+  self.ptr->has_external_tokens = false;
+  self.ptr->dynamic_precedence = 0;
+
+  uint32_t non_extra_index = 0;
+  const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
+  uint32_t lookahead_end_byte = 0;
+
+  for (uint32_t i = 0; i < self.ptr->child_count; i++) {
+    Subtree child = self.ptr->children[i];
+
+    if (i == 0) {
+      self.ptr->padding = ts_subtree_padding(child);
+      self.ptr->size = ts_subtree_size(child);
+    } else {
+      self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child));
+    }
+
+    uint32_t child_lookahead_end_byte =
+      self.ptr->padding.bytes +
+      self.ptr->size.bytes +
+      ts_subtree_lookahead_bytes(child);
+    if (child_lookahead_end_byte > lookahead_end_byte) lookahead_end_byte = child_lookahead_end_byte;
+
+    if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) {
+      self.ptr->error_cost += ts_subtree_error_cost(child);
+    }
+
+    self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child);
+    self.ptr->node_count += ts_subtree_node_count(child);
+
+    if (alias_sequence && alias_sequence[non_extra_index] != 0 && !ts_subtree_extra(child)) {
+      self.ptr->visible_child_count++;
+      if (ts_language_symbol_metadata(language, alias_sequence[non_extra_index]).named) {
+        self.ptr->named_child_count++;
+      }
+    } else if (ts_subtree_visible(child)) {
+      self.ptr->visible_child_count++;
+      if (ts_subtree_named(child)) self.ptr->named_child_count++;
+    } else if (ts_subtree_child_count(child) > 0) {
+      self.ptr->visible_child_count += child.ptr->visible_child_count;
+      self.ptr->named_child_count += child.ptr->named_child_count;
+    }
+
+    if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true;
+
+    if (ts_subtree_is_error(child)) {
+      self.ptr->fragile_left = self.ptr->fragile_right = true;
+      self.ptr->parse_state = TS_TREE_STATE_NONE;
+    }
+
+    if (!ts_subtree_extra(child)) non_extra_index++;
+  }
+
+  self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes;
+
+  if (self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error_repeat) {
+    self.ptr->error_cost +=
+      ERROR_COST_PER_RECOVERY +
+      ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes +
+      ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row;
+    for (uint32_t i = 0; i < self.ptr->child_count; i++) {
+      Subtree child = self.ptr->children[i];
+      uint32_t grandchild_count = ts_subtree_child_count(child);
+      if (ts_subtree_extra(child)) continue;
+      if (ts_subtree_is_error(child) && grandchild_count == 0) continue;
+      if (ts_subtree_visible(child)) {
+        self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
+      } else if (grandchild_count > 0) {
+        self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
+      }
+    }
+  }
+
+  if (self.ptr->child_count > 0) {
+    Subtree first_child = self.ptr->children[0];
+    Subtree last_child = self.ptr->children[self.ptr->child_count - 1];
+
+    self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child);
+    self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child);
+
+    if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true;
+    if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true;
+
+    if (
+      self.ptr->child_count >= 2 &&
+      !self.ptr->visible &&
+      !self.ptr->named &&
+      ts_subtree_symbol(first_child) == self.ptr->symbol
+    ) {
+      if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) {
+        self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1;
+      } else {
+        self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1;
+      }
+    }
+  }
+}
+
+MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
+                                   SubtreeArray *children, unsigned production_id,
+                                   const TSLanguage *language) {
+  TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
+  bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;
+  SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
+  *data = (SubtreeHeapData) {
+    .ref_count = 1,
+    .symbol = symbol,
+    .production_id = production_id,
+    .visible = metadata.visible,
+    .named = metadata.named,
+    .has_changes = false,
+    .fragile_left = fragile,
+    .fragile_right = fragile,
+    .is_keyword = false,
+    .node_count = 0,
+    .first_leaf = {.symbol = 0, .parse_state = 0},
+  };
+  MutableSubtree result = {.ptr = data};
+  ts_subtree_set_children(result, children->contents, children->size, language);
+  return result;
+}
+
+Subtree ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children,
+                                  bool extra, const TSLanguage *language) {
+  MutableSubtree result = ts_subtree_new_node(
+    pool, ts_builtin_sym_error, children, 0, language
+  );
+  result.ptr->extra = extra;
+  return ts_subtree_from_mut(result);
+}
+
+Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding,
+                                    const TSLanguage *language) {
+  Subtree result = ts_subtree_new_leaf(
+    pool, symbol, padding, length_zero(), 0,
+    0, false, false, language
+  );
+
+  if (result.data.is_inline) {
+    result.data.is_missing = true;
+  } else {
+    ((SubtreeHeapData *)result.ptr)->is_missing = true;
+  }
+
+  return result;
+}
+
+void ts_subtree_retain(Subtree self) {
+  if (self.data.is_inline) return;
+  assert(self.ptr->ref_count > 0);
+  atomic_inc((volatile uint32_t *)&self.ptr->ref_count);
+  assert(self.ptr->ref_count != 0);
+}
+
+void ts_subtree_release(SubtreePool *pool, Subtree self) {
+  if (self.data.is_inline) return;
+  array_clear(&pool->tree_stack);
+
+  assert(self.ptr->ref_count > 0);
+  if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) {
+    array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
+  }
+
+  while (pool->tree_stack.size > 0) {
+    MutableSubtree tree = array_pop(&pool->tree_stack);
+    if (tree.ptr->child_count > 0) {
+      for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
+        Subtree child = tree.ptr->children[i];
+        if (child.data.is_inline) continue;
+        assert(child.ptr->ref_count > 0);
+        if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) {
+          array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
+        }
+      }
+      ts_free(tree.ptr->children);
+    } else if (tree.ptr->has_external_tokens) {
+      ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
+    }
+    ts_subtree_pool_free(pool, tree.ptr);
+  }
+}
+
+bool ts_subtree_eq(Subtree self, Subtree other) {
+  if (self.data.is_inline || other.data.is_inline) {
+    return memcmp(&self, &other, sizeof(SubtreeInlineData)) == 0;
+  }
+
+  if (self.ptr) {
+    if (!other.ptr) return false;
+  } else {
+    return !other.ptr;
+  }
+
+  if (self.ptr->symbol != other.ptr->symbol) return false;
+  if (self.ptr->visible != other.ptr->visible) return false;
+  if (self.ptr->named != other.ptr->named) return false;
+  if (self.ptr->padding.bytes != other.ptr->padding.bytes) return false;
+  if (self.ptr->size.bytes != other.ptr->size.bytes) return false;
+  if (self.ptr->symbol == ts_builtin_sym_error) return self.ptr->lookahead_char == other.ptr->lookahead_char;
+  if (self.ptr->child_count != other.ptr->child_count) return false;
+  if (self.ptr->child_count > 0) {
+    if (self.ptr->visible_child_count != other.ptr->visible_child_count) return false;
+    if (self.ptr->named_child_count != other.ptr->named_child_count) return false;
+
+    for (uint32_t i = 0; i < self.ptr->child_count; i++) {
+      if (!ts_subtree_eq(self.ptr->children[i], other.ptr->children[i])) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+int ts_subtree_compare(Subtree left, Subtree right) {
+  if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) return -1;
+  if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) return 1;
+  if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) return -1;
+  if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) return 1;
+  for (uint32_t i = 0, n = ts_subtree_child_count(left); i < n; i++) {
+    Subtree left_child = left.ptr->children[i];
+    Subtree right_child = right.ptr->children[i];
+    switch (ts_subtree_compare(left_child, right_child)) {
+      case -1: return -1;
+      case 1: return 1;
+      default: break;
+    }
+  }
+  return 0;
+}
+
+static inline void ts_subtree_set_has_changes(MutableSubtree *self) {
+  if (self->data.is_inline) {
+    self->data.has_changes = true;
+  } else {
+    self->ptr->has_changes = true;
+  }
+}
+
+Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool) {
+  typedef struct {
+    Subtree *tree;
+    Edit edit;
+  } StackEntry;
+
+  Array(StackEntry) stack = array_new();
+  array_push(&stack, ((StackEntry) {
+    .tree = &self,
+    .edit = (Edit) {
+      .start = {edit->start_byte, edit->start_point},
+      .old_end = {edit->old_end_byte, edit->old_end_point},
+      .new_end = {edit->new_end_byte, edit->new_end_point},
+    },
+  }));
+
+  while (stack.size) {
+    StackEntry entry = array_pop(&stack);
+    Edit edit = entry.edit;
+    bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes;
+    bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes;
+
+    Length size = ts_subtree_size(*entry.tree);
+    Length padding = ts_subtree_padding(*entry.tree);
+    uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree);
+    uint32_t end_byte = padding.bytes + size.bytes + lookahead_bytes;
+    if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue;
+
+    // If the edit is entirely within the space before this subtree, then shift this
+    // subtree over according to the edit without changing its size.
+    if (edit.old_end.bytes <= padding.bytes) {
+      padding = length_add(edit.new_end, length_sub(padding, edit.old_end));
+    }
+
+    // If the edit starts in the space before this subtree and extends into this subtree,
+    // shrink the subtree's content to compensate for the change in the space before it.
+    else if (edit.start.bytes < padding.bytes) {
+      size = length_sub(size, length_sub(edit.old_end, padding));
+      padding = edit.new_end;
+    }
+
+    // If the edit is a pure insertion right at the start of the subtree,
+    // shift the subtree over according to the insertion.
+    else if (edit.start.bytes == padding.bytes && is_pure_insertion) {
+      padding = edit.new_end;
+    }
+
+    // If the edit is within this subtree, resize the subtree to reflect the edit.
+    else {
+      uint32_t total_bytes = padding.bytes + size.bytes;
+      if (edit.start.bytes < total_bytes ||
+         (edit.start.bytes == total_bytes && is_pure_insertion)) {
+        size = length_add(
+          length_sub(edit.new_end, padding),
+          length_sub(size, length_sub(edit.old_end, padding))
+        );
+      }
+    }
+
+    MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree);
+
+    if (result.data.is_inline) {
+      if (ts_subtree_can_inline(padding, size, lookahead_bytes)) {
+        result.data.padding_bytes = padding.bytes;
+        result.data.padding_rows = padding.extent.row;
+        result.data.padding_columns = padding.extent.column;
+        result.data.size_bytes = size.bytes;
+      } else {
+        SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
+        data->ref_count = 1;
+        data->padding = padding;
+        data->size = size;
+        data->lookahead_bytes = lookahead_bytes;
+        data->error_cost = 0;
+        data->child_count = 0;
+        data->symbol = result.data.symbol;
+        data->parse_state = result.data.parse_state;
+        data->visible = result.data.visible;
+        data->named = result.data.named;
+        data->extra = result.data.extra;
+        data->fragile_left = false;
+        data->fragile_right = false;
+        data->has_changes = false;
+        data->has_external_tokens = false;
+        data->is_missing = result.data.is_missing;
+        data->is_keyword = result.data.is_keyword;
+        result.ptr = data;
+      }
+    } else {
+      result.ptr->padding = padding;
+      result.ptr->size = size;
+    }
+
+    ts_subtree_set_has_changes(&result);
+    *entry.tree = ts_subtree_from_mut(result);
+
+    Length child_left, child_right = length_zero();
+    for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) {
+      Subtree *child = &result.ptr->children[i];
+      Length child_size = ts_subtree_total_size(*child);
+      child_left = child_right;
+      child_right = length_add(child_left, child_size);
+
+      // If this child ends before the edit, it is not affected.
+      if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue;
+
+      // If this child starts after the edit, then we're done processing children.
+      if (child_left.bytes > edit.old_end.bytes ||
+          (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
+
+      // Transform edit into the child's coordinate space.
+      Edit child_edit = {
+        .start = length_sub(edit.start, child_left),
+        .old_end = length_sub(edit.old_end, child_left),
+        .new_end = length_sub(edit.new_end, child_left),
+      };
+
+      // Clamp child_edit to the child's bounds.
+      if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero();
+      if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero();
+      if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero();
+      if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size;
+
+      // Interpret all inserted text as applying to the *first* child that touches the edit.
+      // Subsequent children are only never have any text inserted into them; they are only
+      // shrunk to compensate for the edit.
+      if (child_right.bytes > edit.start.bytes ||
+          (child_right.bytes == edit.start.bytes && is_pure_insertion)) {
+        edit.new_end = edit.start;
+      }
+
+      // Children that occur before the edit are not reshaped by the edit.
+      else {
+        child_edit.old_end = child_edit.start;
+        child_edit.new_end = child_edit.start;
+      }
+
+      // Queue processing of this child's subtree.
+      array_push(&stack, ((StackEntry) {
+        .tree = child,
+        .edit = child_edit,
+      }));
+    }
+  }
+
+  array_delete(&stack);
+  return self;
+}
+
+Subtree ts_subtree_last_external_token(Subtree tree) {
+  if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE;
+  while (tree.ptr->child_count > 0) {
+    for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) {
+      Subtree child = tree.ptr->children[i];
+      if (ts_subtree_has_external_tokens(child)) {
+        tree = child;
+        break;
+      }
+    }
+  }
+  return tree;
+}
+
+static size_t ts_subtree__write_char_to_string(char *s, size_t n, int32_t c) {
+  if (c == -1)
+    return snprintf(s, n, "INVALID");
+  else if (c == '\0')
+    return snprintf(s, n, "'\\0'");
+  else if (c == '\n')
+    return snprintf(s, n, "'\\n'");
+  else if (c == '\t')
+    return snprintf(s, n, "'\\t'");
+  else if (c == '\r')
+    return snprintf(s, n, "'\\r'");
+  else if (0 < c && c < 128 && isprint(c))
+    return snprintf(s, n, "'%c'", c);
+  else
+    return snprintf(s, n, "%d", c);
+}
+
+static void ts_subtree__write_dot_string(FILE *f, const char *string) {
+  for (const char *c = string; *c; c++) {
+    if (*c == '"') {
+      fputs("\\\"", f);
+    } else if (*c == '\n') {
+      fputs("\\n", f);
+    } else {
+      fputc(*c, f);
+    }
+  }
+}
+
+static const char *ROOT_FIELD = "__ROOT__";
+
+static size_t ts_subtree__write_to_string(
+  Subtree self, char *string, size_t limit,
+  const TSLanguage *language, bool include_all,
+  TSSymbol alias_symbol, bool alias_is_named, const char *field_name
+) {
+  if (!self.ptr) return snprintf(string, limit, "(NULL)");
+
+  char *cursor = string;
+  char **writer = (limit > 0) ? &cursor : &string;
+  bool is_root = field_name == ROOT_FIELD;
+  bool is_visible =
+    include_all ||
+    ts_subtree_missing(self) ||
+    (
+      alias_symbol
+        ? alias_is_named
+        : ts_subtree_visible(self) && ts_subtree_named(self)
+    );
+
+  if (is_visible) {
+    if (!is_root) {
+      cursor += snprintf(*writer, limit, " ");
+      if (field_name) {
+        cursor += snprintf(*writer, limit, "%s: ", field_name);
+      }
+    }
+
+    if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) {
+      cursor += snprintf(*writer, limit, "(UNEXPECTED ");
+      cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char);
+    } else {
+      TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
+      const char *symbol_name = ts_language_symbol_name(language, symbol);
+      if (ts_subtree_missing(self)) {
+        cursor += snprintf(*writer, limit, "(MISSING ");
+        if (alias_is_named || ts_subtree_named(self)) {
+          cursor += snprintf(*writer, limit, "%s", symbol_name);
+        } else {
+          cursor += snprintf(*writer, limit, "\"%s\"", symbol_name);
+        }
+      } else {
+        cursor += snprintf(*writer, limit, "(%s", symbol_name);
+      }
+    }
+  } else if (is_root) {
+    TSSymbol symbol = ts_subtree_symbol(self);
+    const char *symbol_name = ts_language_symbol_name(language, symbol);
+    cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name);
+  }
+
+  if (ts_subtree_child_count(self)) {
+    const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
+    const TSFieldMapEntry *field_map, *field_map_end;
+    ts_language_field_map(
+      language,
+      self.ptr->production_id,
+      &field_map,
+      &field_map_end
+    );
+
+    uint32_t structural_child_index = 0;
+    for (uint32_t i = 0; i < self.ptr->child_count; i++) {
+      Subtree child = self.ptr->children[i];
+      if (ts_subtree_extra(child)) {
+        cursor += ts_subtree__write_to_string(
+          child, *writer, limit,
+          language, include_all,
+          0, false, NULL
+        );
+      } else {
+        TSSymbol alias_symbol = alias_sequence
+          ? alias_sequence[structural_child_index]
+          : 0;
+        bool alias_is_named = alias_symbol
+          ? ts_language_symbol_metadata(language, alias_symbol).named
+          : false;
+
+        const char *child_field_name = is_visible ? NULL : field_name;
+        for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
+          if (!i->inherited && i->child_index == structural_child_index) {
+            child_field_name = language->field_names[i->field_id];
+            break;
+          }
+        }
+
+        cursor += ts_subtree__write_to_string(
+          child, *writer, limit,
+          language, include_all,
+          alias_symbol, alias_is_named, child_field_name
+        );
+        structural_child_index++;
+      }
+    }
+  }
+
+  if (is_visible) cursor += snprintf(*writer, limit, ")");
+
+  return cursor - string;
+}
+
+char *ts_subtree_string(
+  Subtree self,
+  const TSLanguage *language,
+  bool include_all
+) {
+  char scratch_string[1];
+  size_t size = ts_subtree__write_to_string(
+    self, scratch_string, 0,
+    language, include_all,
+    0, false, ROOT_FIELD
+  ) + 1;
+  char *result = malloc(size * sizeof(char));
+  ts_subtree__write_to_string(
+    self, result, size,
+    language, include_all,
+    0, false, ROOT_FIELD
+  );
+  return result;
+}
+
+void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
+                                 const TSLanguage *language, TSSymbol alias_symbol,
+                                 FILE *f) {
+  TSSymbol subtree_symbol = ts_subtree_symbol(*self);
+  TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol;
+  uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self);
+  fprintf(f, "tree_%p [label=\"", self);
+  ts_subtree__write_dot_string(f, ts_language_symbol_name(language, symbol));
+  fprintf(f, "\"");
+
+  if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext");
+  if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray");
+
+  fprintf(f, ", tooltip=\""
+    "range: %u - %u\n"
+    "state: %d\n"
+    "error-cost: %u\n"
+    "has-changes: %u\n"
+    "repeat-depth: %u\n"
+    "lookahead-bytes: %u",
+    start_offset, end_offset,
+    ts_subtree_parse_state(*self),
+    ts_subtree_error_cost(*self),
+    ts_subtree_has_changes(*self),
+    ts_subtree_repeat_depth(*self),
+    ts_subtree_lookahead_bytes(*self)
+  );
+
+  if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0) {
+    fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char);
+  }
+
+  fprintf(f, "\"]\n");
+
+  uint32_t child_start_offset = start_offset;
+  uint32_t child_info_offset =
+    language->max_alias_sequence_length *
+    ts_subtree_production_id(*self);
+  for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
+    const Subtree *child = &self->ptr->children[i];
+    TSSymbol alias_symbol = 0;
+    if (!ts_subtree_extra(*child) && child_info_offset) {
+      alias_symbol = language->alias_sequences[child_info_offset];
+      child_info_offset++;
+    }
+    ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f);
+    fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i);
+    child_start_offset += ts_subtree_total_bytes(*child);
+  }
+}
+
+void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f) {
+  fprintf(f, "digraph tree {\n");
+  fprintf(f, "edge [arrowhead=none]\n");
+  ts_subtree__print_dot_graph(&self, 0, language, 0, f);
+  fprintf(f, "}\n");
+}
+
+bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) {
+  const ExternalScannerState *state1 = &empty_state;
+  const ExternalScannerState *state2 = &empty_state;
+  if (self.ptr && ts_subtree_has_external_tokens(self) && !self.ptr->child_count) {
+    state1 = &self.ptr->external_scanner_state;
+  }
+  if (other.ptr && ts_subtree_has_external_tokens(other) && !other.ptr->child_count) {
+    state2 = &other.ptr->external_scanner_state;
+  }
+  return ts_external_scanner_state_eq(state1, state2);
+}
--- a/shlr/tree-sitter/lib/src/subtree.h
+++ b/shlr/tree-sitter/lib/src/subtree.h
@ -0,0 +1,285 @@
+#ifndef TREE_SITTER_SUBTREE_H_
+#define TREE_SITTER_SUBTREE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include "./length.h"
+#include "./array.h"
+#include "./error_costs.h"
+#include "tree_sitter/api.h"
+#include "tree_sitter/parser.h"
+
+static const TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
+#define NULL_SUBTREE ((Subtree) {.ptr = NULL})
+
+typedef union Subtree Subtree;
+typedef union MutableSubtree MutableSubtree;
+
+typedef struct {
+  union {
+    char *long_data;
+    char short_data[24];
+  };
+  uint32_t length;
+} ExternalScannerState;
+
+typedef struct {
+  bool is_inline : 1;
+  bool visible : 1;
+  bool named : 1;
+  bool extra : 1;
+  bool has_changes : 1;
+  bool is_missing : 1;
+  bool is_keyword : 1;
+  uint8_t symbol;
+  uint8_t padding_bytes;
+  uint8_t size_bytes;
+  uint8_t padding_columns;
+  uint8_t padding_rows : 4;
+  uint8_t lookahead_bytes : 4;
+  uint16_t parse_state;
+} SubtreeInlineData;
+
+typedef struct {
+  volatile uint32_t ref_count;
+  Length padding;
+  Length size;
+  uint32_t lookahead_bytes;
+  uint32_t error_cost;
+  uint32_t child_count;
+  TSSymbol symbol;
+  TSStateId parse_state;
+
+  bool visible : 1;
+  bool named : 1;
+  bool extra : 1;
+  bool fragile_left : 1;
+  bool fragile_right : 1;
+  bool has_changes : 1;
+  bool has_external_tokens : 1;
+  bool is_missing : 1;
+  bool is_keyword : 1;
+
+  union {
+    // Non-terminal subtrees (`child_count > 0`)
+    struct {
+      Subtree *children;
+      uint32_t visible_child_count;
+      uint32_t named_child_count;
+      uint32_t node_count;
+      uint32_t repeat_depth;
+      int32_t dynamic_precedence;
+      uint16_t production_id;
+      struct {
+        TSSymbol symbol;
+        TSStateId parse_state;
+      } first_leaf;
+    };
+
+    // External terminal subtrees (`child_count == 0 && has_external_tokens`)
+    ExternalScannerState external_scanner_state;
+
+    // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
+    int32_t lookahead_char;
+  };
+} SubtreeHeapData;
+
+union Subtree {
+  SubtreeInlineData data;
+  const SubtreeHeapData *ptr;
+};
+
+union MutableSubtree {
+  SubtreeInlineData data;
+  SubtreeHeapData *ptr;
+};
+
+typedef Array(Subtree) SubtreeArray;
+typedef Array(MutableSubtree) MutableSubtreeArray;
+
+typedef struct {
+  MutableSubtreeArray free_trees;
+  MutableSubtreeArray tree_stack;
+} SubtreePool;
+
+void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned);
+const char *ts_external_scanner_state_data(const ExternalScannerState *);
+
+void ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
+void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
+SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *);
+void ts_subtree_array_reverse(SubtreeArray *);
+
+SubtreePool ts_subtree_pool_new(uint32_t capacity);
+void ts_subtree_pool_delete(SubtreePool *);
+
+Subtree ts_subtree_new_leaf(
+  SubtreePool *, TSSymbol, Length, Length, uint32_t,
+  TSStateId, bool, bool, const TSLanguage *
+);
+Subtree ts_subtree_new_error(
+  SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
+);
+MutableSubtree ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
+Subtree ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, bool, const TSLanguage *);
+Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, const TSLanguage *);
+MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree);
+void ts_subtree_retain(Subtree);
+void ts_subtree_release(SubtreePool *, Subtree);
+bool ts_subtree_eq(Subtree, Subtree);
+int ts_subtree_compare(Subtree, Subtree);
+void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *);
+void ts_subtree_set_children(MutableSubtree, Subtree *, uint32_t, const TSLanguage *);
+void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *);
+Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *);
+char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all);
+void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *);
+Subtree ts_subtree_last_external_token(Subtree);
+bool ts_subtree_external_scanner_state_eq(Subtree, Subtree);
+
+#define SUBTREE_GET(self, name) (self.data.is_inline ? self.data.name : self.ptr->name)
+
+static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); }
+static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); }
+static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); }
+static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); }
+static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); }
+static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); }
+static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); }
+static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); }
+static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); }
+
+#undef SUBTREE_GET
+
+static inline void ts_subtree_set_extra(MutableSubtree *self) {
+  if (self->data.is_inline) {
+    self->data.extra = true;
+  } else {
+    self->ptr->extra = true;
+  }
+}
+
+static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) {
+  if (self.data.is_inline) return self.data.symbol;
+  if (self.ptr->child_count == 0) return self.ptr->symbol;
+  return self.ptr->first_leaf.symbol;
+}
+
+static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) {
+  if (self.data.is_inline) return self.data.parse_state;
+  if (self.ptr->child_count == 0) return self.ptr->parse_state;
+  return self.ptr->first_leaf.parse_state;
+}
+
+static inline Length ts_subtree_padding(Subtree self) {
+  if (self.data.is_inline) {
+    Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}};
+    return result;
+  } else {
+    return self.ptr->padding;
+  }
+}
+
+static inline Length ts_subtree_size(Subtree self) {
+  if (self.data.is_inline) {
+    Length result = {self.data.size_bytes, {0, self.data.size_bytes}};
+    return result;
+  } else {
+    return self.ptr->size;
+  }
+}
+
+static inline Length ts_subtree_total_size(Subtree self) {
+  return length_add(ts_subtree_padding(self), ts_subtree_size(self));
+}
+
+static inline uint32_t ts_subtree_total_bytes(Subtree self) {
+  return ts_subtree_total_size(self).bytes;
+}
+
+static inline uint32_t ts_subtree_child_count(Subtree self) {
+  return self.data.is_inline ? 0 : self.ptr->child_count;
+}
+
+static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
+  return self.data.is_inline ? 0 : self.ptr->repeat_depth;
+}
+
+static inline uint32_t ts_subtree_node_count(Subtree self) {
+  return (self.data.is_inline || self.ptr->child_count == 0) ? 1 : self.ptr->node_count;
+}
+
+static inline uint32_t ts_subtree_visible_child_count(Subtree self) {
+  if (ts_subtree_child_count(self) > 0) {
+    return self.ptr->visible_child_count;
+  } else {
+    return 0;
+  }
+}
+
+static inline uint32_t ts_subtree_error_cost(Subtree self) {
+  if (ts_subtree_missing(self)) {
+    return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
+  } else {
+    return self.data.is_inline ? 0 : self.ptr->error_cost;
+  }
+}
+
+static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
+  return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
+}
+
+static inline uint16_t ts_subtree_production_id(Subtree self) {
+  if (ts_subtree_child_count(self) > 0) {
+    return self.ptr->production_id;
+  } else {
+    return 0;
+  }
+}
+
+static inline bool ts_subtree_fragile_left(Subtree self) {
+  return self.data.is_inline ? false : self.ptr->fragile_left;
+}
+
+static inline bool ts_subtree_fragile_right(Subtree self) {
+  return self.data.is_inline ? false : self.ptr->fragile_right;
+}
+
+static inline bool ts_subtree_has_external_tokens(Subtree self) {
+  return self.data.is_inline ? false : self.ptr->has_external_tokens;
+}
+
+static inline bool ts_subtree_is_fragile(Subtree self) {
+  return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
+}
+
+static inline bool ts_subtree_is_error(Subtree self) {
+  return ts_subtree_symbol(self) == ts_builtin_sym_error;
+}
+
+static inline bool ts_subtree_is_eof(Subtree self) {
+  return ts_subtree_symbol(self) == ts_builtin_sym_end;
+}
+
+static inline Subtree ts_subtree_from_mut(MutableSubtree self) {
+  Subtree result;
+  result.data = self.data;
+  return result;
+}
+
+static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) {
+  MutableSubtree result;
+  result.data = self.data;
+  return result;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_SUBTREE_H_
--- a/shlr/tree-sitter/lib/src/tree.c
+++ b/shlr/tree-sitter/lib/src/tree.c
@ -0,0 +1,148 @@
+#include "tree_sitter/api.h"
+#include "./array.h"
+#include "./get_changed_ranges.h"
+#include "./subtree.h"
+#include "./tree_cursor.h"
+#include "./tree.h"
+
+static const unsigned PARENT_CACHE_CAPACITY = 32;
+
+TSTree *ts_tree_new(
+  Subtree root, const TSLanguage *language,
+  const TSRange *included_ranges, unsigned included_range_count
+) {
+  TSTree *result = ts_malloc(sizeof(TSTree));
+  result->root = root;
+  result->language = language;
+  result->parent_cache = NULL;
+  result->parent_cache_start = 0;
+  result->parent_cache_size = 0;
+  result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange));
+  memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange));
+  result->included_range_count = included_range_count;
+  return result;
+}
+
+TSTree *ts_tree_copy(const TSTree *self) {
+  ts_subtree_retain(self->root);
+  return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count);
+}
+
+void ts_tree_delete(TSTree *self) {
+  if (!self) return;
+
+  SubtreePool pool = ts_subtree_pool_new(0);
+  ts_subtree_release(&pool, self->root);
+  ts_subtree_pool_delete(&pool);
+  ts_free(self->included_ranges);
+  if (self->parent_cache) ts_free(self->parent_cache);
+  ts_free(self);
+}
+
+TSNode ts_tree_root_node(const TSTree *self) {
+  return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0);
+}
+
+const TSLanguage *ts_tree_language(const TSTree *self) {
+  return self->language;
+}
+
+void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
+  for (unsigned i = 0; i < self->included_range_count; i++) {
+    TSRange *range = &self->included_ranges[i];
+    if (range->end_byte >= edit->old_end_byte) {
+      if (range->end_byte != UINT32_MAX) {
+        range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte);
+        range->end_point = point_add(
+          edit->new_end_point,
+          point_sub(range->end_point, edit->old_end_point)
+        );
+        if (range->end_byte < edit->new_end_byte) {
+          range->end_byte = UINT32_MAX;
+          range->end_point = POINT_MAX;
+        }
+      }
+      if (range->start_byte >= edit->old_end_byte) {
+        range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte);
+        range->start_point = point_add(
+          edit->new_end_point,
+          point_sub(range->start_point, edit->old_end_point)
+        );
+        if (range->start_byte < edit->new_end_byte) {
+          range->start_byte = UINT32_MAX;
+          range->start_point = POINT_MAX;
+        }
+      }
+    }
+  }
+
+  SubtreePool pool = ts_subtree_pool_new(0);
+  self->root = ts_subtree_edit(self->root, edit, &pool);
+  self->parent_cache_start = 0;
+  self->parent_cache_size = 0;
+  ts_subtree_pool_delete(&pool);
+}
+
+TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uint32_t *count) {
+  TreeCursor cursor1 = {NULL, array_new()};
+  TreeCursor cursor2 = {NULL, array_new()};
+  ts_tree_cursor_init(&cursor1, ts_tree_root_node(self));
+  ts_tree_cursor_init(&cursor2, ts_tree_root_node(other));
+
+  TSRangeArray included_range_differences = array_new();
+  ts_range_array_get_changed_ranges(
+    self->included_ranges, self->included_range_count,
+    other->included_ranges, other->included_range_count,
+    &included_range_differences
+  );
+
+  TSRange *result;
+  *count = ts_subtree_get_changed_ranges(
+    &self->root, &other->root, &cursor1, &cursor2,
+    self->language, &included_range_differences, &result
+  );
+
+  array_delete(&included_range_differences);
+  array_delete(&cursor1.stack);
+  array_delete(&cursor2.stack);
+  return result;
+}
+
+void ts_tree_print_dot_graph(const TSTree *self, FILE *file) {
+  ts_subtree_print_dot_graph(self->root, self->language, file);
+}
+
+TSNode ts_tree_get_cached_parent(const TSTree *self, const TSNode *node) {
+  for (uint32_t i = 0; i < self->parent_cache_size; i++) {
+    uint32_t index = (self->parent_cache_start + i) % PARENT_CACHE_CAPACITY;
+    ParentCacheEntry *entry = &self->parent_cache[index];
+    if (entry->child == node->id) {
+      return ts_node_new(self, entry->parent, entry->position, entry->alias_symbol);
+    }
+  }
+  return ts_node_new(NULL, NULL, length_zero(), 0);
+}
+
+void ts_tree_set_cached_parent(const TSTree *_self, const TSNode *node, const TSNode *parent) {
+  TSTree *self = (TSTree *)_self;
+  if (!self->parent_cache) {
+    self->parent_cache = ts_calloc(PARENT_CACHE_CAPACITY, sizeof(ParentCacheEntry));
+  }
+
+  uint32_t index = (self->parent_cache_start + self->parent_cache_size) % PARENT_CACHE_CAPACITY;
+  self->parent_cache[index] = (ParentCacheEntry) {
+    .child = node->id,
+    .parent = (const Subtree *)parent->id,
+    .position = {
+      parent->context[0],
+      {parent->context[1], parent->context[2]}
+    },
+    .alias_symbol = parent->context[3],
+  };
+
+  if (self->parent_cache_size == PARENT_CACHE_CAPACITY) {
+    self->parent_cache_start++;
+  } else {
+    self->parent_cache_size++;
+  }
+}
--- a/shlr/tree-sitter/lib/src/tree.h
+++ b/shlr/tree-sitter/lib/src/tree.h
@ -0,0 +1,34 @@
+#ifndef TREE_SITTER_TREE_H_
+#define TREE_SITTER_TREE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+  const Subtree *child;
+  const Subtree *parent;
+  Length position;
+  TSSymbol alias_symbol;
+} ParentCacheEntry;
+
+struct TSTree {
+  Subtree root;
+  const TSLanguage *language;
+  ParentCacheEntry *parent_cache;
+  uint32_t parent_cache_start;
+  uint32_t parent_cache_size;
+  TSRange *included_ranges;
+  unsigned included_range_count;
+};
+
+TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned);
+TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol);
+TSNode ts_tree_get_cached_parent(const TSTree *, const TSNode *);
+void ts_tree_set_cached_parent(const TSTree *, const TSNode *, const TSNode *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_TREE_H_
--- a/shlr/tree-sitter/lib/src/tree_cursor.c
+++ b/shlr/tree-sitter/lib/src/tree_cursor.c
@ -0,0 +1,367 @@
+#include "tree_sitter/api.h"
+#include "./alloc.h"
+#include "./tree_cursor.h"
+#include "./language.h"
+#include "./tree.h"
+
+typedef struct {
+  Subtree parent;
+  const TSTree *tree;
+  Length position;
+  uint32_t child_index;
+  uint32_t structural_child_index;
+  const TSSymbol *alias_sequence;
+} CursorChildIterator;
+
+// CursorChildIterator
+
+static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) {
+  TreeCursorEntry *last_entry = array_back(&self->stack);
+  if (ts_subtree_child_count(*last_entry->subtree) == 0) {
+    return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, NULL};
+  }
+  const TSSymbol *alias_sequence = ts_language_alias_sequence(
+    self->tree->language,
+    last_entry->subtree->ptr->production_id
+  );
+  return (CursorChildIterator) {
+    .tree = self->tree,
+    .parent = *last_entry->subtree,
+    .position = last_entry->position,
+    .child_index = 0,
+    .structural_child_index = 0,
+    .alias_sequence = alias_sequence,
+  };
+}
+
+static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self,
+                                                      TreeCursorEntry *result,
+                                                      bool *visible) {
+  if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
+  const Subtree *child = &self->parent.ptr->children[self->child_index];
+  *result = (TreeCursorEntry) {
+    .subtree = child,
+    .position = self->position,
+    .child_index = self->child_index,
+    .structural_child_index = self->structural_child_index,
+  };
+  *visible = ts_subtree_visible(*child);
+  bool extra = ts_subtree_extra(*child);
+  if (!extra && self->alias_sequence) {
+    *visible |= self->alias_sequence[self->structural_child_index];
+    self->structural_child_index++;
+  }
+
+  self->position = length_add(self->position, ts_subtree_size(*child));
+  self->child_index++;
+
+  if (self->child_index < self->parent.ptr->child_count) {
+    Subtree next_child = self->parent.ptr->children[self->child_index];
+    self->position = length_add(self->position, ts_subtree_padding(next_child));
+  }
+
+  return true;
+}
+
+// TSTreeCursor - lifecycle
+
+TSTreeCursor ts_tree_cursor_new(TSNode node) {
+  TSTreeCursor self = {NULL, NULL, {0, 0}};
+  ts_tree_cursor_init((TreeCursor *)&self, node);
+  return self;
+}
+
+void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) {
+  ts_tree_cursor_init((TreeCursor *)_self, node);
+}
+
+void ts_tree_cursor_init(TreeCursor *self, TSNode node) {
+  self->tree = node.tree;
+  array_clear(&self->stack);
+  array_push(&self->stack, ((TreeCursorEntry) {
+    .subtree = (const Subtree *)node.id,
+    .position = {
+      ts_node_start_byte(node),
+      ts_node_start_point(node)
+    },
+    .child_index = 0,
+    .structural_child_index = 0,
+  }));
+}
+
+void ts_tree_cursor_delete(TSTreeCursor *_self) {
+  TreeCursor *self = (TreeCursor *)_self;
+  array_delete(&self->stack);
+}
+
+// TSTreeCursor - walking the tree
+
+bool ts_tree_cursor_goto_first_child(TSTreeCursor *_self) {
+  TreeCursor *self = (TreeCursor *)_self;
+
+  bool did_descend;
+  do {
+    did_descend = false;
+
+    bool visible;
+    TreeCursorEntry entry;
+    CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
+    while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
+      if (visible) {
+        array_push(&self->stack, entry);
+        return true;
+      }
+
+      if (ts_subtree_visible_child_count(*entry.subtree) > 0) {
+        array_push(&self->stack, entry);
+        did_descend = true;
+        break;
+      }
+    }
+  } while (did_descend);
+
+  return false;
+}
+
+int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *_self, uint32_t goal_byte) {
+  TreeCursor *self = (TreeCursor *)_self;
+  uint32_t initial_size = self->stack.size;
+  uint32_t visible_child_index = 0;
+
+  bool did_descend;
+  do {
+    did_descend = false;
+
+    bool visible;
+    TreeCursorEntry entry;
+    CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
+    while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
+      uint32_t end_byte = entry.position.bytes + ts_subtree_size(*entry.subtree).bytes;
+      bool at_goal = end_byte > goal_byte;
+      uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree);
+
+      if (at_goal) {
+        if (visible) {
+          array_push(&self->stack, entry);
+          return visible_child_index;
+        }
+
+        if (visible_child_count > 0) {
+          array_push(&self->stack, entry);
+          did_descend = true;
+          break;
+        }
+      } else if (visible) {
+        visible_child_index++;
+      } else {
+        visible_child_index += visible_child_count;
+      }
+    }
+  } while (did_descend);
+
+  if (self->stack.size > initial_size &&
+      ts_tree_cursor_goto_next_sibling((TSTreeCursor *)self)) {
+    return visible_child_index;
+  }
+
+  self->stack.size = initial_size;
+  return -1;
+}
+
+bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *_self) {
+  TreeCursor *self = (TreeCursor *)_self;
+  uint32_t initial_size = self->stack.size;
+
+  while (self->stack.size > 1) {
+    TreeCursorEntry entry = array_pop(&self->stack);
+    CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
+    iterator.child_index = entry.child_index;
+    iterator.structural_child_index = entry.structural_child_index;
+    iterator.position = entry.position;
+
+    bool visible = false;
+    ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible);
+    if (visible && self->stack.size + 1 < initial_size) break;
+
+    while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
+      if (visible) {
+        array_push(&self->stack, entry);
+        return true;
+      }
+
+      if (ts_subtree_visible_child_count(*entry.subtree)) {
+        array_push(&self->stack, entry);
+        ts_tree_cursor_goto_first_child(_self);
+        return true;
+      }
+    }
+  }
+
+  self->stack.size = initial_size;
+  return false;
+}
+
+bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
+  TreeCursor *self = (TreeCursor *)_self;
+  for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) {
+    TreeCursorEntry *entry = &self->stack.contents[i];
+    bool is_aliased = false;
+    if (i > 0) {
+      TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
+      const TSSymbol *alias_sequence = ts_language_alias_sequence(
+        self->tree->language,
+        parent_entry->subtree->ptr->production_id
+      );
+      is_aliased = alias_sequence && alias_sequence[entry->structural_child_index];
+    }
+    if (ts_subtree_visible(*entry->subtree) || is_aliased) {
+      self->stack.size = i + 1;
+      return true;
+    }
+  }
+  return false;
+}
+
+TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
+  const TreeCursor *self = (const TreeCursor *)_self;
+  TreeCursorEntry *last_entry = array_back(&self->stack);
+  TSSymbol alias_symbol = 0;
+  if (self->stack.size > 1) {
+    TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
+    const TSSymbol *alias_sequence = ts_language_alias_sequence(
+      self->tree->language,
+      parent_entry->subtree->ptr->production_id
+    );
+    if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) {
+      alias_symbol = alias_sequence[last_entry->structural_child_index];
+    }
+  }
+  return ts_node_new(
+    self->tree,
+    last_entry->subtree,
+    last_entry->position,
+    alias_symbol
+  );
+}
+
+TSFieldId ts_tree_cursor_current_status(
+  const TSTreeCursor *_self,
+  bool *can_have_later_siblings,
+  bool *can_have_later_siblings_with_this_field
+) {
+  const TreeCursor *self = (const TreeCursor *)_self;
+  TSFieldId result = 0;
+  *can_have_later_siblings = false;
+  *can_have_later_siblings_with_this_field = false;
+
+  // Walk up the tree, visiting the current node and its invisible ancestors,
+  // because fields can refer to nodes through invisible *wrapper* nodes,
+  for (unsigned i = self->stack.size - 1; i > 0; i--) {
+    TreeCursorEntry *entry = &self->stack.contents[i];
+    TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
+
+    // Stop walking up when a visible ancestor is found.
+    if (i != self->stack.size - 1) {
+      if (ts_subtree_visible(*entry->subtree)) break;
+      const TSSymbol *alias_sequence = ts_language_alias_sequence(
+        self->tree->language,
+        parent_entry->subtree->ptr->production_id
+      );
+      if (alias_sequence && alias_sequence[entry->structural_child_index]) {
+        break;
+      }
+    }
+
+    if (ts_subtree_child_count(*parent_entry->subtree) > entry->child_index + 1) {
+      *can_have_later_siblings = true;
+    }
+
+    if (ts_subtree_extra(*entry->subtree)) break;
+
+    const TSFieldMapEntry *field_map, *field_map_end;
+    ts_language_field_map(
+      self->tree->language,
+      parent_entry->subtree->ptr->production_id,
+      &field_map, &field_map_end
+    );
+
+    // Look for a field name associated with the current node.
+    if (!result) {
+      for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
+        if (!i->inherited && i->child_index == entry->structural_child_index) {
+          result = i->field_id;
+          *can_have_later_siblings_with_this_field = false;
+          break;
+        }
+      }
+    }
+
+    // Determine if there other later siblings with the same field name.
+    if (result) {
+      for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
+        if (i->field_id == result && i->child_index > entry->structural_child_index) {
+          *can_have_later_siblings_with_this_field = true;
+          break;
+        }
+      }
+    }
+  }
+
+  return result;
+}
+
+TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
+  const TreeCursor *self = (const TreeCursor *)_self;
+
+  // Walk up the tree, visiting the current node and its invisible ancestors.
+  for (unsigned i = self->stack.size - 1; i > 0; i--) {
+    TreeCursorEntry *entry = &self->stack.contents[i];
+    TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
+
+    // Stop walking up when another visible node is found.
+    if (i != self->stack.size - 1) {
+      if (ts_subtree_visible(*entry->subtree)) break;
+      const TSSymbol *alias_sequence = ts_language_alias_sequence(
+        self->tree->language,
+        parent_entry->subtree->ptr->production_id
+      );
+      if (alias_sequence && alias_sequence[entry->structural_child_index]) {
+        break;
+      }
+    }
+
+    if (ts_subtree_extra(*entry->subtree)) break;
+
+    const TSFieldMapEntry *field_map, *field_map_end;
+    ts_language_field_map(
+      self->tree->language,
+      parent_entry->subtree->ptr->production_id,
+      &field_map, &field_map_end
+    );
+    for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
+      if (!i->inherited && i->child_index == entry->structural_child_index) {
+        return i->field_id;
+      }
+    }
+  }
+  return 0;
+}
+
+const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) {
+  TSFieldId id = ts_tree_cursor_current_field_id(_self);
+  if (id) {
+    const TreeCursor *self = (const TreeCursor *)_self;
+    return self->tree->language->field_names[id];
+  } else {
+    return NULL;
+  }
+}
+
+TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) {
+  const TreeCursor *cursor = (const TreeCursor *)_cursor;
+  TSTreeCursor res = {NULL, NULL, {0, 0}};
+  TreeCursor *copy = (TreeCursor *)&res;
+  copy->tree = cursor->tree;
+  array_push_all(&copy->stack, &cursor->stack);
+  return res;
+}
--- a/shlr/tree-sitter/lib/src/tree_cursor.h
+++ b/shlr/tree-sitter/lib/src/tree_cursor.h
@ -0,0 +1,21 @@
+#ifndef TREE_SITTER_TREE_CURSOR_H_
+#define TREE_SITTER_TREE_CURSOR_H_
+
+#include "./subtree.h"
+
+typedef struct {
+  const Subtree *subtree;
+  Length position;
+  uint32_t child_index;
+  uint32_t structural_child_index;
+} TreeCursorEntry;
+
+typedef struct {
+  const TSTree *tree;
+  Array(TreeCursorEntry) stack;
+} TreeCursor;
+
+void ts_tree_cursor_init(TreeCursor *, TSNode);
+TSFieldId ts_tree_cursor_current_status(const TSTreeCursor *, bool *, bool *);
+
+#endif  // TREE_SITTER_TREE_CURSOR_H_
--- a/shlr/tree-sitter/lib/src/unicode.h
+++ b/shlr/tree-sitter/lib/src/unicode.h
@ -0,0 +1,50 @@
+#ifndef TREE_SITTER_UNICODE_H_
+#define TREE_SITTER_UNICODE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <limits.h>
+#include <stdint.h>
+
+#define U_EXPORT
+#define U_EXPORT2
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+
+static const int32_t TS_DECODE_ERROR = U_SENTINEL;
+
+// These functions read one unicode code point from the given string,
+// returning the number of bytes consumed.
+typedef uint32_t (*UnicodeDecodeFunction)(
+  const uint8_t *string,
+  uint32_t length,
+  int32_t *code_point
+);
+
+static inline uint32_t ts_decode_utf8(
+  const uint8_t *string,
+  uint32_t length,
+  int32_t *code_point
+) {
+  uint32_t i = 0;
+  U8_NEXT(string, i, length, *code_point);
+  return i;
+}
+
+static inline uint32_t ts_decode_utf16(
+  const uint8_t *string,
+  uint32_t length,
+  int32_t *code_point
+) {
+  uint32_t i = 0;
+  U16_NEXT(((uint16_t *)string), i, length, *code_point);
+  return i * 2;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_UNICODE_H_
--- a/shlr/tree-sitter/lib/src/unicode/ICU_SHA
+++ b/shlr/tree-sitter/lib/src/unicode/ICU_SHA
@ -0,0 +1 @@
+552b01f61127d30d6589aa4bf99468224979b661
--- a/shlr/tree-sitter/lib/src/unicode/LICENSE
+++ b/shlr/tree-sitter/lib/src/unicode/LICENSE
@ -0,0 +1,414 @@
+COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
+
+Copyright © 1991-2019 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that either
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software, or
+(b) this copyright and permission notice appear in associated
+Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.
+
+---------------------
+
+Third-Party Software Licenses
+
+This section contains third-party software notices and/or additional
+terms for licensed third-party software components included within ICU
+libraries.
+
+1. ICU License - ICU 1.8.1 to ICU 57.1
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1995-2016 International Business Machines Corporation and others
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, and/or sell copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies of
+the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
+SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
+RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
+CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale, use
+or other dealings in this Software without prior written authorization
+of the copyright holder.
+
+All trademarks and registered trademarks mentioned herein are the
+property of their respective owners.
+
+2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
+
+ #     The Google Chrome software developed by Google is licensed under
+ # the BSD license. Other software included in this distribution is
+ # provided under other licenses, as set forth below.
+ #
+ #  The BSD License
+ #  http://opensource.org/licenses/bsd-license.php
+ #  Copyright (C) 2006-2008, Google Inc.
+ #
+ #  All rights reserved.
+ #
+ #  Redistribution and use in source and binary forms, with or without
+ # modification, are permitted provided that the following conditions are met:
+ #
+ #  Redistributions of source code must retain the above copyright notice,
+ # this list of conditions and the following disclaimer.
+ #  Redistributions in binary form must reproduce the above
+ # copyright notice, this list of conditions and the following
+ # disclaimer in the documentation and/or other materials provided with
+ # the distribution.
+ #  Neither the name of  Google Inc. nor the names of its
+ # contributors may be used to endorse or promote products derived from
+ # this software without specific prior written permission.
+ #
+ #
+ #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ #
+ #
+ #  The word list in cjdict.txt are generated by combining three word lists
+ # listed below with further processing for compound word breaking. The
+ # frequency is generated with an iterative training against Google web
+ # corpora.
+ #
+ #  * Libtabe (Chinese)
+ #    - https://sourceforge.net/project/?group_id=1519
+ #    - Its license terms and conditions are shown below.
+ #
+ #  * IPADIC (Japanese)
+ #    - http://chasen.aist-nara.ac.jp/chasen/distribution.html
+ #    - Its license terms and conditions are shown below.
+ #
+ #  ---------COPYING.libtabe ---- BEGIN--------------------
+ #
+ #  /*
+ #   * Copyright (c) 1999 TaBE Project.
+ #   * Copyright (c) 1999 Pai-Hsiang Hsiao.
+ #   * All rights reserved.
+ #   *
+ #   * Redistribution and use in source and binary forms, with or without
+ #   * modification, are permitted provided that the following conditions
+ #   * are met:
+ #   *
+ #   * . Redistributions of source code must retain the above copyright
+ #   *   notice, this list of conditions and the following disclaimer.
+ #   * . Redistributions in binary form must reproduce the above copyright
+ #   *   notice, this list of conditions and the following disclaimer in
+ #   *   the documentation and/or other materials provided with the
+ #   *   distribution.
+ #   * . Neither the name of the TaBE Project nor the names of its
+ #   *   contributors may be used to endorse or promote products derived
+ #   *   from this software without specific prior written permission.
+ #   *
+ #   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ #   * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ #   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ #   * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ #   * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ #   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ #   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ #   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ #   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ #   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ #   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ #   * OF THE POSSIBILITY OF SUCH DAMAGE.
+ #   */
+ #
+ #  /*
+ #   * Copyright (c) 1999 Computer Systems and Communication Lab,
+ #   *                    Institute of Information Science, Academia
+ #       *                    Sinica. All rights reserved.
+ #   *
+ #   * Redistribution and use in source and binary forms, with or without
+ #   * modification, are permitted provided that the following conditions
+ #   * are met:
+ #   *
+ #   * . Redistributions of source code must retain the above copyright
+ #   *   notice, this list of conditions and the following disclaimer.
+ #   * . Redistributions in binary form must reproduce the above copyright
+ #   *   notice, this list of conditions and the following disclaimer in
+ #   *   the documentation and/or other materials provided with the
+ #   *   distribution.
+ #   * . Neither the name of the Computer Systems and Communication Lab
+ #   *   nor the names of its contributors may be used to endorse or
+ #   *   promote products derived from this software without specific
+ #   *   prior written permission.
+ #   *
+ #   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ #   * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ #   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ #   * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ #   * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ #   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ #   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ #   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ #   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ #   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ #   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ #   * OF THE POSSIBILITY OF SUCH DAMAGE.
+ #   */
+ #
+ #  Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
+ #      University of Illinois
+ #  c-tsai4@uiuc.edu  http://casper.beckman.uiuc.edu/~c-tsai4
+ #
+ #  ---------------COPYING.libtabe-----END--------------------------------
+ #
+ #
+ #  ---------------COPYING.ipadic-----BEGIN-------------------------------
+ #
+ #  Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
+ #  and Technology.  All Rights Reserved.
+ #
+ #  Use, reproduction, and distribution of this software is permitted.
+ #  Any copy of this software, whether in its original form or modified,
+ #  must include both the above copyright notice and the following
+ #  paragraphs.
+ #
+ #  Nara Institute of Science and Technology (NAIST),
+ #  the copyright holders, disclaims all warranties with regard to this
+ #  software, including all implied warranties of merchantability and
+ #  fitness, in no event shall NAIST be liable for
+ #  any special, indirect or consequential damages or any damages
+ #  whatsoever resulting from loss of use, data or profits, whether in an
+ #  action of contract, negligence or other tortuous action, arising out
+ #  of or in connection with the use or performance of this software.
+ #
+ #  A large portion of the dictionary entries
+ #  originate from ICOT Free Software.  The following conditions for ICOT
+ #  Free Software applies to the current dictionary as well.
+ #
+ #  Each User may also freely distribute the Program, whether in its
+ #  original form or modified, to any third party or parties, PROVIDED
+ #  that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
+ #  on, or be attached to, the Program, which is distributed substantially
+ #  in the same form as set out herein and that such intended
+ #  distribution, if actually made, will neither violate or otherwise
+ #  contravene any of the laws and regulations of the countries having
+ #  jurisdiction over the User or the intended distribution itself.
+ #
+ #  NO WARRANTY
+ #
+ #  The program was produced on an experimental basis in the course of the
+ #  research and development conducted during the project and is provided
+ #  to users as so produced on an experimental basis.  Accordingly, the
+ #  program is provided without any warranty whatsoever, whether express,
+ #  implied, statutory or otherwise.  The term "warranty" used herein
+ #  includes, but is not limited to, any warranty of the quality,
+ #  performance, merchantability and fitness for a particular purpose of
+ #  the program and the nonexistence of any infringement or violation of
+ #  any right of any third party.
+ #
+ #  Each user of the program will agree and understand, and be deemed to
+ #  have agreed and understood, that there is no warranty whatsoever for
+ #  the program and, accordingly, the entire risk arising from or
+ #  otherwise connected with the program is assumed by the user.
+ #
+ #  Therefore, neither ICOT, the copyright holder, or any other
+ #  organization that participated in or was otherwise related to the
+ #  development of the program and their respective officials, directors,
+ #  officers and other employees shall be held liable for any and all
+ #  damages, including, without limitation, general, special, incidental
+ #  and consequential damages, arising out of or otherwise in connection
+ #  with the use or inability to use the program or any product, material
+ #  or result produced or otherwise obtained by using the program,
+ #  regardless of whether they have been advised of, or otherwise had
+ #  knowledge of, the possibility of such damages at any time during the
+ #  project or thereafter.  Each user will be deemed to have agreed to the
+ #  foregoing by his or her commencement of use of the program.  The term
+ #  "use" as used herein includes, but is not limited to, the use,
+ #  modification, copying and distribution of the program and the
+ #  production of secondary products from the program.
+ #
+ #  In the case where the program, whether in its original form or
+ #  modified, was distributed or delivered to or received by a user from
+ #  any person, organization or entity other than ICOT, unless it makes or
+ #  grants independently of ICOT any specific warranty to the user in
+ #  writing, such person, organization or entity, will also be exempted
+ #  from and not be held liable to the user for any such damages as noted
+ #  above as far as the program is concerned.
+ #
+ #  ---------------COPYING.ipadic-----END----------------------------------
+
+3. Lao Word Break Dictionary Data (laodict.txt)
+
+ #  Copyright (c) 2013 International Business Machines Corporation
+ #  and others. All Rights Reserved.
+ #
+ # Project: http://code.google.com/p/lao-dictionary/
+ # Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt
+ # License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt
+ #              (copied below)
+ #
+ #  This file is derived from the above dictionary, with slight
+ #  modifications.
+ #  ----------------------------------------------------------------------
+ #  Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
+ #  All rights reserved.
+ #
+ #  Redistribution and use in source and binary forms, with or without
+ #  modification,
+ #  are permitted provided that the following conditions are met:
+ #
+ #
+ # Redistributions of source code must retain the above copyright notice, this
+ #  list of conditions and the following disclaimer. Redistributions in
+ #  binary form must reproduce the above copyright notice, this list of
+ #  conditions and the following disclaimer in the documentation and/or
+ #  other materials provided with the distribution.
+ #
+ #
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ # OF THE POSSIBILITY OF SUCH DAMAGE.
+ #  --------------------------------------------------------------------------
+
+4. Burmese Word Break Dictionary Data (burmesedict.txt)
+
+ #  Copyright (c) 2014 International Business Machines Corporation
+ #  and others. All Rights Reserved.
+ #
+ #  This list is part of a project hosted at:
+ #    github.com/kanyawtech/myanmar-karen-word-lists
+ #
+ #  --------------------------------------------------------------------------
+ #  Copyright (c) 2013, LeRoy Benjamin Sharon
+ #  All rights reserved.
+ #
+ #  Redistribution and use in source and binary forms, with or without
+ #  modification, are permitted provided that the following conditions
+ #  are met: Redistributions of source code must retain the above
+ #  copyright notice, this list of conditions and the following
+ #  disclaimer.  Redistributions in binary form must reproduce the
+ #  above copyright notice, this list of conditions and the following
+ #  disclaimer in the documentation and/or other materials provided
+ #  with the distribution.
+ #
+ #    Neither the name Myanmar Karen Word Lists, nor the names of its
+ #    contributors may be used to endorse or promote products derived
+ #    from this software without specific prior written permission.
+ #
+ #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ #  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ #  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ #  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ #  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ #  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ #  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ #  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ #  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ #  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ #  THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ #  SUCH DAMAGE.
+ #  --------------------------------------------------------------------------
+
+5. Time Zone Database
+
+  ICU uses the public domain data and code derived from Time Zone
+Database for its time zone support. The ownership of the TZ database
+is explained in BCP 175: Procedure for Maintaining the Time Zone
+Database section 7.
+
+ # 7.  Database Ownership
+ #
+ #    The TZ database itself is not an IETF Contribution or an IETF
+ #    document.  Rather it is a pre-existing and regularly updated work
+ #    that is in the public domain, and is intended to remain in the
+ #    public domain.  Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
+ #    not apply to the TZ Database or contributions that individuals make
+ #    to it.  Should any claims be made and substantiated against the TZ
+ #    Database, the organization that is providing the IANA
+ #    Considerations defined in this RFC, under the memorandum of
+ #    understanding with the IETF, currently ICANN, may act in accordance
+ #    with all competent court orders.  No ownership claims will be made
+ #    by ICANN or the IETF Trust on the database or the code.  Any person
+ #    making a contribution to the database or code waives all rights to
+ #    future claims in that contribution or in the TZ Database.
+
+6. Google double-conversion
+
+Copyright 2006-2011, the V8 project authors. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+    * Neither the name of Google Inc. nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/shlr/tree-sitter/lib/src/unicode/README.md
+++ b/shlr/tree-sitter/lib/src/unicode/README.md
@ -0,0 +1,29 @@
+# ICU Parts
+
+This directory contains a small subset of files from the Unicode organization's [ICU repository](https://github.com/unicode-org/icu).
+
+### License
+
+The license for these files is contained in the `LICENSE` file within this directory.
+
+### Contents
+
+* Source files taken from the [`icu4c/source/common/unicode`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c/source/common/unicode) directory:
+  * `utf8.h`
+  * `utf16.h`
+  * `umachine.h`
+* Empty source files that are referenced by the above source files, but whose original contents in `libicu` are not needed:
+  * `ptypes.h`
+  * `urename.h`
+  * `utf.h`
+* `ICU_SHA` - File containing the Git SHA of the commit in the `icu` repository from which the files were obtained.
+* `LICENSE` - The license file from the [`icu4c`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c) directory of the `icu` repository.
+* `README.md` - This text file.
+
+### Updating ICU
+
+To incorporate changes from the upstream `icu` repository:
+
+* Update `ICU_SHA` with the new Git SHA.
+* Update `LICENSE` with the license text from the directory mentioned above.
+* Update `utf8.h`, `utf16.h`, and `umachine.h` with their new contents in the `icu` repository.
--- a/shlr/tree-sitter/lib/src/unicode/ptypes.h
+++ b/shlr/tree-sitter/lib/src/unicode/ptypes.h
@ -0,0 +1 @@
+// This file must exist in order for `utf8.h` and `utf16.h` to be used.
--- a/shlr/tree-sitter/lib/src/unicode/umachine.h
+++ b/shlr/tree-sitter/lib/src/unicode/umachine.h
@ -0,0 +1,448 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 1999-2015, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  umachine.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep13
+*   created by: Markus W. Scherer
+*
+*   This file defines basic types and constants for ICU to be
+*   platform-independent. umachine.h and utf.h are included into
+*   utypes.h to provide all the general definitions for ICU.
+*   All of these definitions used to be in utypes.h before
+*   the UTF-handling macros made this unmaintainable.
+*/
+
+#ifndef __UMACHINE_H__
+#define __UMACHINE_H__
+
+
+/**
+ * \file
+ * \brief Basic types and constants for UTF
+ *
+ * <h2> Basic types and constants for UTF </h2>
+ *   This file defines basic types and constants for utf.h to be
+ *   platform-independent. umachine.h and utf.h are included into
+ *   utypes.h to provide all the general definitions for ICU.
+ *   All of these definitions used to be in utypes.h before
+ *   the UTF-handling macros made this unmaintainable.
+ *
+ */
+/*==========================================================================*/
+/* Include platform-dependent definitions                                   */
+/* which are contained in the platform-specific file platform.h             */
+/*==========================================================================*/
+
+#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
+
+/*
+ * ANSI C headers:
+ * stddef.h defines wchar_t
+ */
+#include <stddef.h>
+
+/*==========================================================================*/
+/* For C wrappers, we use the symbol U_STABLE.                                */
+/* This works properly if the includer is C or C++.                         */
+/* Functions are declared   U_STABLE return-type U_EXPORT2 function-name()... */
+/*==========================================================================*/
+
+/**
+ * \def U_CFUNC
+ * This is used in a declaration of a library private ICU C function.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_BEGIN
+ * This is used to begin a declaration of a library private ICU C API.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_END
+ * This is used to end a declaration of a library private ICU C API
+ * @stable ICU 2.4
+ */
+
+#ifdef __cplusplus
+#   define U_CFUNC extern "C"
+#   define U_CDECL_BEGIN extern "C" {
+#   define U_CDECL_END   }
+#else
+#   define U_CFUNC extern
+#   define U_CDECL_BEGIN
+#   define U_CDECL_END
+#endif
+
+#ifndef U_ATTRIBUTE_DEPRECATED
+/**
+ * \def U_ATTRIBUTE_DEPRECATED
+ *  This is used for GCC specific attributes
+ * @internal
+ */
+#if U_GCC_MAJOR_MINOR >= 302
+#    define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
+/**
+ * \def U_ATTRIBUTE_DEPRECATED
+ * This is used for Visual C++ specific attributes
+ * @internal
+ */
+#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
+#    define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
+#else
+#    define U_ATTRIBUTE_DEPRECATED
+#endif
+#endif
+
+/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
+#define U_CAPI U_CFUNC U_EXPORT
+/** This is used to declare a function as a stable public ICU C API*/
+#define U_STABLE U_CAPI
+/** This is used to declare a function as a draft public ICU C API  */
+#define U_DRAFT  U_CAPI
+/** This is used to declare a function as a deprecated public ICU C API  */
+#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
+/** This is used to declare a function as an obsolete public ICU C API  */
+#define U_OBSOLETE U_CAPI
+/** This is used to declare a function as an internal ICU C API  */
+#define U_INTERNAL U_CAPI
+
+/**
+ * \def U_OVERRIDE
+ * Defined to the C++11 "override" keyword if available.
+ * Denotes a class or member which is an override of the base class.
+ * May result in an error if it applied to something not an override.
+ * @internal
+ */
+#ifndef U_OVERRIDE
+#define U_OVERRIDE override
+#endif
+
+/**
+ * \def U_FINAL
+ * Defined to the C++11 "final" keyword if available.
+ * Denotes a class or member which may not be overridden in subclasses.
+ * May result in an error if subclasses attempt to override.
+ * @internal
+ */
+#if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
+#define U_FINAL final
+#endif
+
+// Before ICU 65, function-like, multi-statement ICU macros were just defined as
+// series of statements wrapped in { } blocks and the caller could choose to
+// either treat them as if they were actual functions and end the invocation
+// with a trailing ; creating an empty statement after the block or else omit
+// this trailing ; using the knowledge that the macro would expand to { }.
+//
+// But doing so doesn't work well with macros that look like functions and
+// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
+// switches to the standard solution of wrapping such macros in do { } while.
+//
+// This will however break existing code that depends on being able to invoke
+// these macros without a trailing ; so to be able to remain compatible with
+// such code the wrapper is itself defined as macros so that it's possible to
+// build ICU 65 and later with the old macro behaviour, like this:
+//
+// CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
+// runConfigureICU ...
+
+/**
+ * \def UPRV_BLOCK_MACRO_BEGIN
+ * Defined as the "do" keyword by default.
+ * @internal
+ */
+#ifndef UPRV_BLOCK_MACRO_BEGIN
+#define UPRV_BLOCK_MACRO_BEGIN do
+#endif
+
+/**
+ * \def UPRV_BLOCK_MACRO_END
+ * Defined as "while (FALSE)" by default.
+ * @internal
+ */
+#ifndef UPRV_BLOCK_MACRO_END
+#define UPRV_BLOCK_MACRO_END while (FALSE)
+#endif
+
+/*==========================================================================*/
+/* limits for int32_t etc., like in POSIX inttypes.h                        */
+/*==========================================================================*/
+
+#ifndef INT8_MIN
+/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
+#   define INT8_MIN        ((int8_t)(-128))
+#endif
+#ifndef INT16_MIN
+/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
+#   define INT16_MIN       ((int16_t)(-32767-1))
+#endif
+#ifndef INT32_MIN
+/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
+#   define INT32_MIN       ((int32_t)(-2147483647-1))
+#endif
+
+#ifndef INT8_MAX
+/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
+#   define INT8_MAX        ((int8_t)(127))
+#endif
+#ifndef INT16_MAX
+/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
+#   define INT16_MAX       ((int16_t)(32767))
+#endif
+#ifndef INT32_MAX
+/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
+#   define INT32_MAX       ((int32_t)(2147483647))
+#endif
+
+#ifndef UINT8_MAX
+/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
+#   define UINT8_MAX       ((uint8_t)(255U))
+#endif
+#ifndef UINT16_MAX
+/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
+#   define UINT16_MAX      ((uint16_t)(65535U))
+#endif
+#ifndef UINT32_MAX
+/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
+#   define UINT32_MAX      ((uint32_t)(4294967295U))
+#endif
+
+#if defined(U_INT64_T_UNAVAILABLE)
+# error int64_t is required for decimal format and rule-based number format.
+#else
+# ifndef INT64_C
+/**
+ * Provides a platform independent way to specify a signed 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
+ * @stable ICU 2.8
+ */
+#   define INT64_C(c) c ## LL
+# endif
+# ifndef UINT64_C
+/**
+ * Provides a platform independent way to specify an unsigned 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
+ * @stable ICU 2.8
+ */
+#   define UINT64_C(c) c ## ULL
+# endif
+# ifndef U_INT64_MIN
+/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
+#     define U_INT64_MIN       ((int64_t)(INT64_C(-9223372036854775807)-1))
+# endif
+# ifndef U_INT64_MAX
+/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
+#     define U_INT64_MAX       ((int64_t)(INT64_C(9223372036854775807)))
+# endif
+# ifndef U_UINT64_MAX
+/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
+#     define U_UINT64_MAX      ((uint64_t)(UINT64_C(18446744073709551615)))
+# endif
+#endif
+
+/*==========================================================================*/
+/* Boolean data type                                                        */
+/*==========================================================================*/
+
+/** The ICU boolean type @stable ICU 2.0 */
+typedef int8_t UBool;
+
+#ifndef TRUE
+/** The TRUE value of a UBool @stable ICU 2.0 */
+#   define TRUE  1
+#endif
+#ifndef FALSE
+/** The FALSE value of a UBool @stable ICU 2.0 */
+#   define FALSE 0
+#endif
+
+
+/*==========================================================================*/
+/* Unicode data types                                                       */
+/*==========================================================================*/
+
+/* wchar_t-related definitions -------------------------------------------- */
+
+/*
+ * \def U_WCHAR_IS_UTF16
+ * Defined if wchar_t uses UTF-16.
+ *
+ * @stable ICU 2.0
+ */
+/*
+ * \def U_WCHAR_IS_UTF32
+ * Defined if wchar_t uses UTF-32.
+ *
+ * @stable ICU 2.0
+ */
+#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
+#   ifdef __STDC_ISO_10646__
+#       if (U_SIZEOF_WCHAR_T==2)
+#           define U_WCHAR_IS_UTF16
+#       elif (U_SIZEOF_WCHAR_T==4)
+#           define  U_WCHAR_IS_UTF32
+#       endif
+#   elif defined __UCS2__
+#       if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
+#           define U_WCHAR_IS_UTF16
+#       endif
+#   elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
+#       if (U_SIZEOF_WCHAR_T==4)
+#           define U_WCHAR_IS_UTF32
+#       endif
+#   elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
+#       define U_WCHAR_IS_UTF32
+#   elif U_PLATFORM_HAS_WIN32_API
+#       define U_WCHAR_IS_UTF16
+#   endif
+#endif
+
+/* UChar and UChar32 definitions -------------------------------------------- */
+
+/** Number of bytes in a UChar. @stable ICU 2.0 */
+#define U_SIZEOF_UCHAR 2
+
+/**
+ * \def U_CHAR16_IS_TYPEDEF
+ * If 1, then char16_t is a typedef and not a real type (yet)
+ * @internal
+ */
+#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
+// for AIX, uchar.h needs to be included
+# include <uchar.h>
+# define U_CHAR16_IS_TYPEDEF 1
+#elif defined(_MSC_VER) && (_MSC_VER < 1900)
+// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
+// and instead use a typedef.  https://msdn.microsoft.com/library/bb531344.aspx
+# define U_CHAR16_IS_TYPEDEF 1
+#else
+# define U_CHAR16_IS_TYPEDEF 0
+#endif
+
+
+/**
+ * \var UChar
+ *
+ * The base type for UTF-16 code units and pointers.
+ * Unsigned 16-bit integer.
+ * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
+ *
+ * UChar is configurable by defining the macro UCHAR_TYPE
+ * on the preprocessor or compiler command line:
+ * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
+ * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
+ * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
+ *
+ * The default is UChar=char16_t.
+ *
+ * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
+ *
+ * In C, char16_t is a simple typedef of uint_least16_t.
+ * ICU requires uint_least16_t=uint16_t for data memory mapping.
+ * On macOS, char16_t is not available because the uchar.h standard header is missing.
+ *
+ * @stable ICU 4.4
+ */
+
+#if 1
+    // #if 1 is normal. UChar defaults to char16_t in C++.
+    // For configuration testing of UChar=uint16_t temporarily change this to #if 0.
+    // The intltest Makefile #defines UCHAR_TYPE=char16_t,
+    // so we only #define it to uint16_t if it is undefined so far.
+#elif !defined(UCHAR_TYPE)
+#   define UCHAR_TYPE uint16_t
+#endif
+
+#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
+        defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
+    // Inside the ICU library code, never configurable.
+    typedef char16_t UChar;
+#elif defined(UCHAR_TYPE)
+    typedef UCHAR_TYPE UChar;
+#elif defined(__cplusplus)
+    typedef char16_t UChar;
+#else
+    typedef uint16_t UChar;
+#endif
+
+/**
+ * \var OldUChar
+ * Default ICU 58 definition of UChar.
+ * A base type for UTF-16 code units and pointers.
+ * Unsigned 16-bit integer.
+ *
+ * Define OldUChar to be wchar_t if that is 16 bits wide.
+ * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
+ *
+ * This makes the definition of OldUChar platform-dependent
+ * but allows direct string type compatibility with platforms with
+ * 16-bit wchar_t types.
+ *
+ * This is how UChar was defined in ICU 58, for transition convenience.
+ * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
+ * The current UChar responds to UCHAR_TYPE but OldUChar does not.
+ *
+ * @stable ICU 59
+ */
+#if U_SIZEOF_WCHAR_T==2
+    typedef wchar_t OldUChar;
+#elif defined(__CHAR16_TYPE__)
+    typedef __CHAR16_TYPE__ OldUChar;
+#else
+    typedef uint16_t OldUChar;
+#endif
+
+/**
+ * Define UChar32 as a type for single Unicode code points.
+ * UChar32 is a signed 32-bit integer (same as int32_t).
+ *
+ * The Unicode code point range is 0..0x10ffff.
+ * All other values (negative or >=0x110000) are illegal as Unicode code points.
+ * They may be used as sentinel values to indicate "done", "error"
+ * or similar non-code point conditions.
+ *
+ * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
+ * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
+ * or else to be uint32_t.
+ * That is, the definition of UChar32 was platform-dependent.
+ *
+ * @see U_SENTINEL
+ * @stable ICU 2.4
+ */
+typedef int32_t UChar32;
+
+/**
+ * This value is intended for sentinel values for APIs that
+ * (take or) return single code points (UChar32).
+ * It is outside of the Unicode code point range 0..0x10ffff.
+ *
+ * For example, a "done" or "error" value in a new API
+ * could be indicated with U_SENTINEL.
+ *
+ * ICU APIs designed before ICU 2.4 usually define service-specific "done"
+ * values, mostly 0xffff.
+ * Those may need to be distinguished from
+ * actual U+ffff text contents by calling functions like
+ * CharacterIterator::hasNext() or UnicodeString::length().
+ *
+ * @return -1
+ * @see UChar32
+ * @stable ICU 2.4
+ */
+#define U_SENTINEL (-1)
+
+#include "unicode/urename.h"
+
+#endif
--- a/shlr/tree-sitter/lib/src/unicode/urename.h
+++ b/shlr/tree-sitter/lib/src/unicode/urename.h
@ -0,0 +1 @@
+// This file must exist in order for `utf8.h` and `utf16.h` to be used.
--- a/shlr/tree-sitter/lib/src/unicode/utf.h
+++ b/shlr/tree-sitter/lib/src/unicode/utf.h
@ -0,0 +1 @@
+// This file must exist in order for `utf8.h` and `utf16.h` to be used.
--- a/shlr/tree-sitter/lib/src/unicode/utf16.h
+++ b/shlr/tree-sitter/lib/src/unicode/utf16.h
@ -0,0 +1,733 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf16.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep09
+*   created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 16-bit Unicode handling macros
+ * 
+ * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (http://userguide.icu-project.org/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used  for if-else-while... 
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF16_H__
+#define __UTF16_H__
+
+#include "unicode/umachine.h"
+#ifndef __UTF_H__
+#   include "unicode/utf.h"
+#endif
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
+
+/**
+ * Is this code unit a lead surrogate (U+d800..U+dbff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code unit a trail surrogate (U+dc00..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code unit a surrogate (U+d800..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a trail surrogate?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 4.2
+ */
+#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
+
+/**
+ * Helper constant for U16_GET_SUPPLEMENTARY.
+ * @internal
+ */
+#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/**
+ * Get a supplementary code point value (U+10000..U+10ffff)
+ * from its lead and trail surrogates.
+ * The result is undefined if the input values are not
+ * lead and trail surrogates.
+ *
+ * @param lead lead surrogate (U+d800..U+dbff)
+ * @param trail trail surrogate (U+dc00..U+dfff)
+ * @return supplementary code point (U+10000..U+10ffff)
+ * @stable ICU 2.4
+ */
+#define U16_GET_SUPPLEMENTARY(lead, trail) \
+    (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
+
+
+/**
+ * Get the lead surrogate (0xd800..0xdbff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return lead surrogate (U+d800..U+dbff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/**
+ * Get the trail surrogate (0xdc00..0xdfff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return trail surrogate (U+dc00..U+dfff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/**
+ * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
+ * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
+ * @param c 32-bit code point
+ * @return 1 or 2
+ * @stable ICU 2.4
+ */
+#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/**
+ * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
+ * @return 2
+ * @stable ICU 2.4
+ */
+#define U16_MAX_LENGTH 2
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ * The result is undefined if the offset points to a single, unpaired surrogate.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_GET
+ * @stable ICU 2.4
+ */
+#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[i]; \
+    if(U16_IS_SURROGATE(c)) { \
+        if(U16_IS_SURROGATE_LEAD(c)) { \
+            (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
+        } else { \
+            (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to a single, unpaired surrogate, then
+ * c is set to that unpaired surrogate.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[i]; \
+    if(U16_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(U16_IS_SURROGATE_LEAD(c)) { \
+            if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+                (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+            } \
+        } else { \
+            if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+            } \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to a single, unpaired surrogate, then
+ * c is set to U+FFFD.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @stable ICU 60
+ */
+#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[i]; \
+    if(U16_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(U16_IS_SURROGATE_LEAD(c)) { \
+            if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+                (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+            } else { \
+                (c)=0xfffd; \
+            } \
+        } else { \
+            if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+            } else { \
+                (c)=0xfffd; \
+            } \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset points to a single, unpaired lead surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_NEXT
+ * @stable ICU 2.4
+ */
+#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[(i)++]; \
+    if(U16_IS_LEAD(c)) { \
+        (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[(i)++]; \
+    if(U16_IS_LEAD(c)) { \
+        uint16_t __c2; \
+        if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+            ++(i); \
+            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then c is set to U+FFFD.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @stable ICU 60
+ */
+#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[(i)++]; \
+    if(U16_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+            ++(i); \
+            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+        } else { \
+            (c)=0xfffd; \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U16_APPEND
+ * @stable ICU 2.4
+ */
+#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    if((uint32_t)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16_t)(c); \
+    } else { \
+        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a surrogate pair is written, checks for sufficient space in the string.
+ * If the code point is not valid or a trail surrogate does not fit,
+ * then isError is set to TRUE.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset, must be i<capacity
+ * @param capacity size of the string buffer
+ * @param c code point to append
+ * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
+ * @see U16_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
+    if((uint32_t)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16_t)(c); \
+    } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
+        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+    } else /* c>0x10ffff or not enough space */ { \
+        (isError)=TRUE; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_FWD_1
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U16_IS_LEAD((s)[(i)++])) { \
+        ++(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @see U16_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
+        ++(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_FWD_N
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U16_FWD_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param n number of code points to skip
+ * @see U16_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
+        U16_FWD_1(s, i, length); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U16_IS_TRAIL((s)[i])) { \
+        --(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i
+ * @see U16_SET_CP_START_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+        --(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind a single, unpaired trail surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_PREV
+ * @stable ICU 2.4
+ */
+#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[--(i)]; \
+    if(U16_IS_TRAIL(c)) { \
+        (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then c is set to that unpaired surrogate.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[--(i)]; \
+    if(U16_IS_TRAIL(c)) { \
+        uint16_t __c2; \
+        if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+            --(i); \
+            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then c is set to U+FFFD.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @stable ICU 60
+ */
+#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(s)[--(i)]; \
+    if(U16_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+            --(i); \
+            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+        } else { \
+            (c)=0xfffd; \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_BACK_1
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U16_IS_TRAIL((s)[--(i)])) { \
+        --(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @see U16_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+        --(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_BACK_N
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U16_BACK_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start start of string
+ * @param i string offset, must be start<i
+ * @param n number of code points to skip
+ * @see U16_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0 && (i)>(start)) { \
+        U16_BACK_1(s, start, i); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U16_IS_LEAD((s)[(i)-1])) { \
+        ++(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, start<=i<=length
+ * @param length int32_t string length
+ * @see U16_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+    if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
+        ++(i); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+#endif
--- a/shlr/tree-sitter/lib/src/unicode/utf8.h
+++ b/shlr/tree-sitter/lib/src/unicode/utf8.h
@ -0,0 +1,881 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2015, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf8.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep13
+*   created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 8-bit Unicode handling macros
+ *
+ * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (http://userguide.icu-project.org/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used  for if-else-while...
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF8_H__
+#define __UTF8_H__
+
+#include "unicode/umachine.h"
+#ifndef __UTF_H__
+#   include "unicode/utf.h"
+#endif
+
+/* internal definitions ----------------------------------------------------- */
+
+/**
+ * Counts the trail bytes for a UTF-8 lead byte.
+ * Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES(leadByte) \
+    (U8_IS_LEAD(leadByte) ? \
+        ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
+
+/**
+ * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
+ * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
+    (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ * @internal
+ */
+#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/**
+ * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * Lead byte E0..EF bits 3..0 are used as byte index,
+ * first trail byte bits 7..5 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD3_AND_T1
+ * @internal
+ */
+#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
+
+/**
+ * Internal 3-byte UTF-8 validity check.
+ * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
+
+/**
+ * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * First trail byte bits 7..4 are used as byte index,
+ * lead byte F0..F4 bits 2..0 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD4_AND_T1
+ * @internal
+ */
+#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
+
+/**
+ * Internal 4-byte UTF-8 validity check.
+ * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
+
+/**
+ * Function for handling "next code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE UChar32 U_EXPORT2
+utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "append code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE int32_t U_EXPORT2
+utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
+
+/**
+ * Function for handling "previous code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE UChar32 U_EXPORT2
+utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "skip backward one code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_STABLE int32_t U_EXPORT2
+utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_SINGLE(c) (((c)&0x80)==0)
+
+/**
+ * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
+// 0x32=0xf4-0xc2
+
+/**
+ * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
+
+/**
+ * How many code units (bytes) are used for the UTF-8 encoding
+ * of this Unicode code point?
+ * @param c 32-bit code point
+ * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
+ * @stable ICU 2.4
+ */
+#define U8_LENGTH(c) \
+    ((uint32_t)(c)<=0x7f ? 1 : \
+        ((uint32_t)(c)<=0x7ff ? 2 : \
+            ((uint32_t)(c)<=0xd7ff ? 3 : \
+                ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
+                    ((uint32_t)(c)<=0xffff ? 3 : 4)\
+                ) \
+            ) \
+        ) \
+    )
+
+/**
+ * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
+ * @return 4
+ * @stable ICU 2.4
+ */
+#define U8_MAX_LENGTH 4
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ * The result is undefined if the offset points to an illegal UTF-8
+ * byte sequence.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_GET
+ * @stable ICU 2.4
+ */
+#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t _u8_get_unsafe_index=(int32_t)(i); \
+    U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
+    U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to a negative value.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset
+ * @param i int32_t string offset, must be start<=i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t _u8_get_index=(i); \
+    U8_SET_CP_START(s, start, _u8_get_index); \
+    U8_NEXT(s, _u8_get_index, length, c); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to U+FFFD.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_GET() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset
+ * @param i int32_t string offset, must be start<=i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_GET
+ * @stable ICU 51
+ */
+#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t _u8_get_index=(i); \
+    U8_SET_CP_START(s, start, _u8_get_index); \
+    U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * The result is undefined if the offset points to a trail byte
+ * or an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_NEXT
+ * @stable ICU 2.4
+ */
+#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(uint8_t)(s)[(i)++]; \
+    if(!U8_IS_SINGLE(c)) { \
+        if((c)<0xe0) { \
+            (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
+        } else if((c)<0xf0) { \
+            /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
+            (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
+            (i)+=2; \
+        } else { \
+            (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
+            (i)+=3; \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to U+FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_NEXT() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_NEXT
+ * @stable ICU 51
+ */
+#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
+
+/** @internal */
+#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(uint8_t)(s)[(i)++]; \
+    if(!U8_IS_SINGLE(c)) { \
+        uint8_t __t = 0; \
+        if((i)!=(length) && \
+            /* fetch/validate/assemble all but last trail byte */ \
+            ((c)>=0xe0 ? \
+                ((c)<0xf0 ?  /* U+0800..U+FFFF except surrogates */ \
+                    U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
+                    (__t&=0x3f, 1) \
+                :  /* U+10000..U+10FFFF */ \
+                    ((c)-=0xf0)<=4 && \
+                    U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
+                    ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
+                    (__t=(s)[i]-0x80)<=0x3f) && \
+                /* valid second-to-last trail byte */ \
+                ((c)=((c)<<6)|__t, ++(i)!=(length)) \
+            :  /* U+0080..U+07FF */ \
+                (c)>=0xc2 && ((c)&=0x1f, 1)) && \
+            /* last trail byte */ \
+            (__t=(s)[i]-0x80)<=0x3f && \
+            ((c)=((c)<<6)|__t, ++(i), 1)) { \
+        } else { \
+            (c)=(sub);  /* ill-formed*/ \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U8_APPEND
+ * @stable ICU 2.4
+ */
+#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    uint32_t __uc=(c); \
+    if(__uc<=0x7f) { \
+        (s)[(i)++]=(uint8_t)__uc; \
+    } else { \
+        if(__uc<=0x7ff) { \
+            (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
+        } else { \
+            if(__uc<=0xffff) { \
+                (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
+            } else { \
+                (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
+                (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
+            } \
+            (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+        } \
+        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a non-ASCII code point is written, checks for sufficient space in the string.
+ * If the code point is not valid or trail bytes do not fit,
+ * then isError is set to TRUE.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i int32_t string offset, must be i<capacity
+ * @param capacity int32_t size of the string buffer
+ * @param c UChar32 code point to append
+ * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
+ * @see U8_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
+    uint32_t __uc=(c); \
+    if(__uc<=0x7f) { \
+        (s)[(i)++]=(uint8_t)__uc; \
+    } else if(__uc<=0x7ff && (i)+1<(capacity)) { \
+        (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
+        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+    } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
+        (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
+        (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+    } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
+        (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
+        (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
+        (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+    } else { \
+        (isError)=TRUE; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_FWD_1
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @see U8_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+    uint8_t __b=(s)[(i)++]; \
+    if(U8_IS_LEAD(__b) && (i)!=(length)) { \
+        uint8_t __t1=(s)[i]; \
+        if((0xe0<=__b && __b<0xf0)) { \
+            if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
+                    ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
+                ++(i); \
+            } \
+        } else if(__b<0xe0) { \
+            if(U8_IS_TRAIL(__t1)) { \
+                ++(i); \
+            } \
+        } else /* c>=0xf0 */ { \
+            if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
+                    ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
+                    ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
+                ++(i); \
+            } \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_FWD_N
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U8_FWD_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param n number of code points to skip
+ * @see U8_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
+        U8_FWD_1(s, i, length); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    while(U8_IS_TRAIL((s)[i])) { --(i); } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ *
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<=i
+ * @see U8_SET_CP_START_UNSAFE
+ * @see U8_TRUNCATE_IF_INCOMPLETE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U8_IS_TRAIL((s)[(i)])) { \
+        (i)=utf8_back1SafeBody(s, start, (i)); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * If the string ends with a UTF-8 byte sequence that is valid so far
+ * but incomplete, then reduce the length of the string to end before
+ * the lead byte of that incomplete sequence.
+ * For example, if the string ends with E1 80, the length is reduced by 2.
+ *
+ * In all other cases (the string ends with a complete sequence, or it is not
+ * possible for any further trail byte to extend the trailing sequence)
+ * the length remains unchanged.
+ *
+ * Useful for processing text split across multiple buffers
+ * (save the incomplete sequence for later)
+ * and for optimizing iteration
+ * (check for string length only once per character).
+ *
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_SET_CP_START(), this macro never reads s[length].
+ *
+ * (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param length int32_t string length (usually start<=length)
+ * @see U8_SET_CP_START
+ * @stable ICU 61
+ */
+#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
+    if((length)>(start)) { \
+        uint8_t __b1=s[(length)-1]; \
+        if(U8_IS_SINGLE(__b1)) { \
+            /* common ASCII character */ \
+        } else if(U8_IS_LEAD(__b1)) { \
+            --(length); \
+        } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
+            uint8_t __b2=s[(length)-2]; \
+            if(0xe0<=__b2 && __b2<=0xf4) { \
+                if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
+                        U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
+                    (length)-=2; \
+                } \
+            } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
+                uint8_t __b3=s[(length)-3]; \
+                if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
+                    (length)-=3; \
+                } \
+            } \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_PREV
+ * @stable ICU 2.4
+ */
+#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(uint8_t)(s)[--(i)]; \
+    if(U8_IS_TRAIL(c)) { \
+        uint8_t __b, __count=1, __shift=6; \
+\
+        /* c is a trail byte */ \
+        (c)&=0x3f; \
+        for(;;) { \
+            __b=(s)[--(i)]; \
+            if(__b>=0xc0) { \
+                U8_MASK_LEAD_BYTE(__b, __count); \
+                (c)|=(UChar32)__b<<__shift; \
+                break; \
+            } else { \
+                (c)|=(UChar32)(__b&0x3f)<<__shift; \
+                ++__count; \
+                __shift+=6; \
+            } \
+        } \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(uint8_t)(s)[--(i)]; \
+    if(!U8_IS_SINGLE(c)) { \
+        (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_PREV() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_PREV
+ * @stable ICU 51
+ */
+#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+    (c)=(uint8_t)(s)[--(i)]; \
+    if(!U8_IS_SINGLE(c)) { \
+        (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_BACK_1
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    while(U8_IS_TRAIL((s)[--(i)])) {} \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @see U8_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+    if(U8_IS_TRAIL((s)[--(i)])) { \
+        (i)=utf8_back1SafeBody(s, start, (i)); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_BACK_N
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U8_BACK_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t index of the start of the string
+ * @param i int32_t string offset, must be start<i
+ * @param n number of code points to skip
+ * @see U8_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+    int32_t __N=(n); \
+    while(__N>0 && (i)>(start)) { \
+        U8_BACK_1(s, start, i); \
+        --__N; \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+    U8_BACK_1_UNSAFE(s, i); \
+    U8_FWD_1_UNSAFE(s, i); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<=i<=length
+ * @param length int32_t string length
+ * @see U8_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+    if((start)<(i) && ((i)<(length) || (length)<0)) { \
+        U8_BACK_1(s, start, i); \
+        U8_FWD_1(s, i, length); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+#endif
				`@ -0,0 +1 @@`
				// This file must exist in order for `utf8.h` and `utf16.h` to be used.