summaryrefslogtreecommitdiff
path: root/src/treesit.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/treesit.c')
-rw-r--r--src/treesit.c803
1 files changed, 599 insertions, 204 deletions
diff --git a/src/treesit.c b/src/treesit.c
index 5a4fe3e8803..d2dd83b29fe 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -421,10 +421,17 @@ static Lisp_Object Vtreesit_str_match;
static Lisp_Object Vtreesit_str_pred;
/* This is the limit on recursion levels for some tree-sitter
- functions. Remember to update docstrings when changing this
- value. */
-const ptrdiff_t treesit_recursion_limit = 1000;
-bool treesit_initialized = false;
+ functions. Remember to update docstrings when changing this value.
+
+ If we think of programs and AST, it is very rare for any program to
+ have a very deep AST. For example, you would need 1000+ levels of
+ nested if-statements, or a struct somehow nested for 1000+ levels.
+ It’s hard for me to imagine any hand-written or machine generated
+ program to be like that. So I think 1000 is already generous. If
+ we look at xdisp.c, its AST only have 30 levels. */
+#define TREESIT_RECURSION_LIMIT 1000
+
+static bool treesit_initialized = false;
static bool
load_tree_sitter_if_necessary (bool required)
@@ -478,40 +485,47 @@ treesit_initialize (void)
static void
treesit_symbol_to_c_name (char *symbol_name)
{
- for (int idx = 0; idx < strlen (symbol_name); idx++)
+ size_t len = strlen (symbol_name);
+ for (int idx = 0; idx < len; idx++)
{
if (symbol_name[idx] == '-')
symbol_name[idx] = '_';
}
}
+/* Find the override name for LANGUAGE_SYMBOL in
+ treesit-load-name-override-list. Set NAME and C_SYMBOL to the
+ override name, and return true if there exists one, otherwise
+ return false.
+
+ This function may signal if treesit-load-name-override-list is
+ malformed. */
static bool
treesit_find_override_name (Lisp_Object language_symbol, Lisp_Object *name,
Lisp_Object *c_symbol)
{
- Lisp_Object tem;
-
CHECK_LIST (Vtreesit_load_name_override_list);
+ Lisp_Object tail = Vtreesit_load_name_override_list;
- tem = Vtreesit_load_name_override_list;
-
- FOR_EACH_TAIL (tem)
+ FOR_EACH_TAIL (tail)
{
- Lisp_Object lang = XCAR (XCAR (tem));
+ Lisp_Object entry = XCAR (tail);
+ CHECK_LIST (entry);
+ Lisp_Object lang = XCAR (entry);
CHECK_SYMBOL (lang);
if (EQ (lang, language_symbol))
{
- *name = Fnth (make_fixnum (1), XCAR (tem));
+ *name = Fnth (make_fixnum (1), entry);
CHECK_STRING (*name);
- *c_symbol = Fnth (make_fixnum (2), XCAR (tem));
+ *c_symbol = Fnth (make_fixnum (2), entry);
CHECK_STRING (*c_symbol);
return true;
}
}
- CHECK_LIST_END (tem, Vtreesit_load_name_override_list);
+ CHECK_LIST_END (tail, Vtreesit_load_name_override_list);
return false;
}
@@ -1016,11 +1030,6 @@ treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree,
static void
treesit_ensure_parsed (Lisp_Object parser)
{
- /* Make sure this comes before everything else, see comment
- (ref:notifier-inside-ensure-parsed) for more detail. */
- if (!XTS_PARSER (parser)->need_reparse)
- return;
-
struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
/* Before we parse, catch up with the narrowing situation. */
@@ -1029,6 +1038,11 @@ treesit_ensure_parsed (Lisp_Object parser)
because it might set the flag to true. */
treesit_sync_visible_region (parser);
+ /* Make sure this comes before everything else, see comment
+ (ref:notifier-inside-ensure-parsed) for more detail. */
+ if (!XTS_PARSER (parser)->need_reparse)
+ return;
+
TSParser *treesit_parser = XTS_PARSER (parser)->parser;
TSTree *tree = XTS_PARSER (parser)->tree;
TSInput input = XTS_PARSER (parser)->input;
@@ -1619,6 +1633,9 @@ buffer. */)
TSRange *treesit_ranges = xmalloc (sizeof (TSRange) * len);
struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+ /* We can use XFUXNUM, XCAR, XCDR freely because we have checked
+ the input by treesit_check_range_argument. */
+
for (int idx = 0; !NILP (ranges); idx++, ranges = XCDR (ranges))
{
Lisp_Object range = XCAR (ranges);
@@ -1639,9 +1656,6 @@ buffer. */)
}
success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
treesit_ranges, len);
- /* Although XFIXNUM could signal, it should be impossible
- because we have checked the input by treesit_check_range_argument.
- So there is no need for unwind-protect. */
xfree (treesit_ranges);
}
@@ -1962,19 +1976,19 @@ live. */)
TSNode treesit_node = XTS_NODE (node)->node;
bool result;
- if (EQ (property, Qoutdated))
+ if (BASE_EQ (property, Qoutdated))
return treesit_node_uptodate_p (node) ? Qnil : Qt;
treesit_check_node (node);
- if (EQ (property, Qnamed))
+ if (BASE_EQ (property, Qnamed))
result = ts_node_is_named (treesit_node);
- else if (EQ (property, Qmissing))
+ else if (BASE_EQ (property, Qmissing))
result = ts_node_is_missing (treesit_node);
- else if (EQ (property, Qextra))
+ else if (BASE_EQ (property, Qextra))
result = ts_node_is_extra (treesit_node);
- else if (EQ (property, Qhas_error))
+ else if (BASE_EQ (property, Qhas_error))
result = ts_node_has_error (treesit_node);
- else if (EQ (property, Qlive))
+ else if (BASE_EQ (property, Qlive))
result = treesit_parser_live_p (XTS_NODE (node)->parser);
else
signal_error ("Expecting `named', `missing', `extra', "
@@ -2293,19 +2307,19 @@ PATTERN can be
See Info node `(elisp)Pattern Matching' for detailed explanation. */)
(Lisp_Object pattern)
{
- if (EQ (pattern, QCanchor))
+ if (BASE_EQ (pattern, QCanchor))
return Vtreesit_str_dot;
- if (EQ (pattern, intern_c_string (":?")))
+ if (BASE_EQ (pattern, QCquestion))
return Vtreesit_str_question_mark;
- if (EQ (pattern, intern_c_string (":*")))
+ if (BASE_EQ (pattern, QCstar))
return Vtreesit_str_star;
- if (EQ (pattern, intern_c_string (":+")))
+ if (BASE_EQ (pattern, QCplus))
return Vtreesit_str_plus;
- if (EQ (pattern, QCequal))
+ if (BASE_EQ (pattern, QCequal))
return Vtreesit_str_pound_equal;
- if (EQ (pattern, QCmatch))
+ if (BASE_EQ (pattern, QCmatch))
return Vtreesit_str_pound_match;
- if (EQ (pattern, QCpred))
+ if (BASE_EQ (pattern, QCpred))
return Vtreesit_str_pound_pred;
Lisp_Object opening_delimeter
= VECTORP (pattern)
@@ -2407,87 +2421,111 @@ treesit_predicates_for_pattern (TSQuery *query, uint32_t pattern_index)
return Fnreverse (result);
}
-/* Translate a capture NAME (symbol) to a node.
- Signals treesit-query-error if such node is not captured. */
-static Lisp_Object
+/* Translate a capture NAME (symbol) to a node. If everything goes
+ fine, set NODE and return true; if error occurs (e.g., when there
+ is no node for the capture name), set NODE to Qnil, SIGNAL_DATA to
+ a suitable signal data, and return false. */
+static bool
treesit_predicate_capture_name_to_node (Lisp_Object name,
- struct capture_range captures)
+ struct capture_range captures,
+ Lisp_Object *node,
+ Lisp_Object *signal_data)
{
- Lisp_Object node = Qnil;
+ *node = Qnil;
for (Lisp_Object tail = captures.start; !EQ (tail, captures.end);
tail = XCDR (tail))
{
if (EQ (XCAR (XCAR (tail)), name))
{
- node = XCDR (XCAR (tail));
+ *node = XCDR (XCAR (tail));
break;
}
}
- if (NILP (node))
- xsignal3 (Qtreesit_query_error,
- build_string ("Cannot find captured node"),
- name, build_string ("A predicate can only refer"
- " to captured nodes in the "
- "same pattern"));
- return node;
+ if (NILP (*node))
+ {
+ *signal_data = list3 (build_string ("Cannot find captured node"),
+ name, build_string ("A predicate can only refer"
+ " to captured nodes in the "
+ "same pattern"));
+ return false;
+ }
+ return true;
}
/* Translate a capture NAME (symbol) to the text of the captured node.
- Signals treesit-query-error if such node is not captured. */
-static Lisp_Object
+ If everything goes fine, set TEXT to the text and return true;
+ otherwise set TEXT to Qnil and set SIGNAL_DATA to a suitable signal
+ data. */
+static bool
treesit_predicate_capture_name_to_text (Lisp_Object name,
- struct capture_range captures)
+ struct capture_range captures,
+ Lisp_Object *text,
+ Lisp_Object *signal_data)
{
- Lisp_Object node = treesit_predicate_capture_name_to_node (name, captures);
+ Lisp_Object node = Qnil;
+ if (!treesit_predicate_capture_name_to_node (name, captures, &node, signal_data))
+ return false;
struct buffer *old_buffer = current_buffer;
set_buffer_internal (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer));
- Lisp_Object text = Fbuffer_substring (Ftreesit_node_start (node),
- Ftreesit_node_end (node));
+ *text = Fbuffer_substring (Ftreesit_node_start (node),
+ Ftreesit_node_end (node));
set_buffer_internal (old_buffer);
- return text;
+ return true;
}
/* Handles predicate (#equal A B). Return true if A equals B; return
false otherwise. A and B can be either string, or a capture name.
The capture name evaluates to the text its captured node spans in
- the buffer. */
+ the buffer. If everything goes fine, don't touch SIGNAL_DATA; if
+ error occurs, set it to a suitable signal data. */
static bool
-treesit_predicate_equal (Lisp_Object args, struct capture_range captures)
+treesit_predicate_equal (Lisp_Object args, struct capture_range captures,
+ Lisp_Object *signal_data)
{
if (XFIXNUM (Flength (args)) != 2)
- xsignal2 (Qtreesit_query_error,
- build_string ("Predicate `equal' requires "
- "two arguments but only given"),
- Flength (args));
-
+ {
+ *signal_data = list2 (build_string ("Predicate `equal' requires "
+ "two arguments but only given"),
+ Flength (args));
+ return false;
+ }
Lisp_Object arg1 = XCAR (args);
Lisp_Object arg2 = XCAR (XCDR (args));
- Lisp_Object text1 = (STRINGP (arg1)
- ? arg1
- : treesit_predicate_capture_name_to_text (arg1,
- captures));
- Lisp_Object text2 = (STRINGP (arg2)
- ? arg2
- : treesit_predicate_capture_name_to_text (arg2,
- captures));
+ Lisp_Object text1 = arg1;
+ Lisp_Object text2 = arg2;
+ if (SYMBOLP (arg1))
+ {
+ if (!treesit_predicate_capture_name_to_text (arg1, captures, &text1,
+ signal_data))
+ return false;
+ }
+ if (SYMBOLP (arg2))
+ {
+ if (!treesit_predicate_capture_name_to_text (arg2, captures, &text2,
+ signal_data))
+ return false;
+ }
return !NILP (Fstring_equal (text1, text2));
}
/* Handles predicate (#match "regexp" @node). Return true if "regexp"
- matches the text spanned by @node; return false otherwise. Matching
- is case-sensitive. */
+ matches the text spanned by @node; return false otherwise.
+ Matching is case-sensitive. If everything goes fine, don't touch
+ SIGNAL_DATA; if error occurs, set it to a suitable signal data. */
static bool
-treesit_predicate_match (Lisp_Object args, struct capture_range captures)
+treesit_predicate_match (Lisp_Object args, struct capture_range captures,
+ Lisp_Object *signal_data)
{
if (XFIXNUM (Flength (args)) != 2)
- xsignal2 (Qtreesit_query_error,
- build_string ("Predicate `match' requires two "
- "arguments but only given"),
- Flength (args));
-
+ {
+ *signal_data = list2 (build_string ("Predicate `match' requires two "
+ "arguments but only given"),
+ Flength (args));
+ return false;
+ }
Lisp_Object regexp = XCAR (args);
Lisp_Object capture_name = XCAR (XCDR (args));
@@ -2504,12 +2542,10 @@ treesit_predicate_match (Lisp_Object args, struct capture_range captures)
build_string ("The second argument to `match' should "
"be a capture name, not a string"));
- Lisp_Object node = treesit_predicate_capture_name_to_node (capture_name,
- captures);
-
- struct buffer *old_buffer = current_buffer;
- struct buffer *buffer = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
- set_buffer_internal (buffer);
+ Lisp_Object node = Qnil;
+ if (!treesit_predicate_capture_name_to_node (capture_name, captures, &node,
+ signal_data))
+ return false;
TSNode treesit_node = XTS_NODE (node)->node;
ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
@@ -2537,61 +2573,71 @@ treesit_predicate_match (Lisp_Object args, struct capture_range captures)
ZV = old_zv;
ZV_BYTE = old_zv_byte;
- set_buffer_internal (old_buffer);
-
return (val > 0);
}
/* Handles predicate (#pred FN ARG...). Return true if FN returns
non-nil; return false otherwise. The arity of FN must match the
- number of ARGs */
+ number of ARGs. If everything goes fine, don't touch SIGNAL_DATA;
+ if error occurs, set it to a suitable signal data. */
static bool
-treesit_predicate_pred (Lisp_Object args, struct capture_range captures)
+treesit_predicate_pred (Lisp_Object args, struct capture_range captures,
+ Lisp_Object *signal_data)
{
if (XFIXNUM (Flength (args)) < 2)
- xsignal2 (Qtreesit_query_error,
- build_string ("Predicate `pred' requires "
- "at least two arguments, "
- "but was only given"),
- Flength (args));
+ {
+ *signal_data = list2 (build_string ("Predicate `pred' requires "
+ "at least two arguments, "
+ "but was only given"),
+ Flength (args));
+ return false;
+ }
Lisp_Object fn = Fintern (XCAR (args), Qnil);
Lisp_Object nodes = Qnil;
Lisp_Object tail = XCDR (args);
FOR_EACH_TAIL (tail)
- nodes = Fcons (treesit_predicate_capture_name_to_node (XCAR (tail),
- captures),
- nodes);
+ {
+ Lisp_Object node = Qnil;
+ if (!treesit_predicate_capture_name_to_node (XCAR (tail), captures, &node,
+ signal_data))
+ return false;
+ nodes = Fcons (node, nodes);
+ }
nodes = Fnreverse (nodes);
return !NILP (CALLN (Fapply, fn, nodes));
}
/* If all predicates in PREDICATES passes, return true; otherwise
- return false. */
+ return false. If everything goes fine, don't touch SIGNAL_DATA; if
+ error occurs, set it to a suitable signal data. */
static bool
-treesit_eval_predicates (struct capture_range captures, Lisp_Object predicates)
+treesit_eval_predicates (struct capture_range captures, Lisp_Object predicates,
+ Lisp_Object *signal_data)
{
bool pass = true;
/* Evaluate each predicates. */
for (Lisp_Object tail = predicates;
- !NILP (tail); tail = XCDR (tail))
+ pass && !NILP (tail); tail = XCDR (tail))
{
Lisp_Object predicate = XCAR (tail);
Lisp_Object fn = XCAR (predicate);
Lisp_Object args = XCDR (predicate);
if (!NILP (Fstring_equal (fn, Vtreesit_str_equal)))
- pass &= treesit_predicate_equal (args, captures);
+ pass &= treesit_predicate_equal (args, captures, signal_data);
else if (!NILP (Fstring_equal (fn, Vtreesit_str_match)))
- pass &= treesit_predicate_match (args, captures);
+ pass &= treesit_predicate_match (args, captures, signal_data);
else if (!NILP (Fstring_equal (fn, Vtreesit_str_pred)))
- pass &= treesit_predicate_pred (args, captures);
+ pass &= treesit_predicate_pred (args, captures, signal_data);
else
- xsignal3 (Qtreesit_query_error,
- build_string ("Invalid predicate"),
- fn, build_string ("Currently Emacs only supports"
- " equal, match, and pred"
- " predicate"));
+ {
+ *signal_data = list3 (build_string ("Invalid predicate"),
+ fn, build_string ("Currently Emacs only supports"
+ " equal, match, and pred"
+ " predicates"));
+ pass = false;
+ }
}
/* If all predicates passed, add captures to result list. */
return pass;
@@ -2631,8 +2677,8 @@ You can use `treesit-query-validate' to validate and debug a query. */)
Lisp_Object signal_symbol = Qnil;
Lisp_Object signal_data = Qnil;
TSQuery *treesit_query = treesit_ensure_query_compiled (lisp_query,
- &signal_symbol,
- &signal_data);
+ &signal_symbol,
+ &signal_data);
if (treesit_query == NULL)
xsignal (signal_symbol, signal_data);
@@ -2641,6 +2687,92 @@ You can use `treesit-query-validate' to validate and debug a query. */)
}
}
+/* Resolve OBJ into a tree-sitter node Lisp_Object. OBJ can be a
+ node, a parser, or a language symbol. Note that this function can
+ signal. */
+static Lisp_Object treesit_resolve_node (Lisp_Object obj)
+{
+ if (TS_NODEP (obj))
+ {
+ treesit_check_node (obj); /* Check if up-to-date. */
+ return obj;
+ }
+ else if (TS_PARSERP (obj))
+ {
+ treesit_check_parser (obj); /* Check if deleted. */
+ return Ftreesit_parser_root_node (obj);
+ }
+ else if (SYMBOLP (obj))
+ {
+ Lisp_Object parser
+ = Ftreesit_parser_create (obj, Fcurrent_buffer (), Qnil);
+ return Ftreesit_parser_root_node (parser);
+ }
+ else
+ xsignal2 (Qwrong_type_argument,
+ list4 (Qor, Qtreesit_node_p, Qtreesit_parser_p, Qsymbolp),
+ obj);
+}
+
+/* Create and initialize QUERY. When success, initialize TS_QUERY,
+ CURSOR, and NEED_FREE, and return true; if failed, initialize
+ SIGNAL_SYMBOL and SIGNAL_DATA, and return false. If NEED_FREE is
+ initialized to true, the TS_QUERY and CURSOR needs to be freed
+ after use; otherwise they shouldn't be freed by hand.
+
+ Basically this function looks at QUERY and check its type, if QUERY
+ is a compiled query, this function takes out its query and cursor;
+ if QUERY is a string or a cons, this function creates a new query
+ and cursor (so they need to be manually freed).
+
+ This function assumes QUERY is either a compiled query, a string or
+ a cons, the caller should make sure QUERY is valid.
+
+ LANG is the language to use if we need to create the query and
+ cursor. */
+static bool
+treesit_initialize_query (Lisp_Object query, const TSLanguage *lang,
+ TSQuery **ts_query, TSQueryCursor **cursor,
+ bool *need_free, Lisp_Object *signal_symbol,
+ Lisp_Object *signal_data)
+{
+ if (TS_COMPILED_QUERY_P (query))
+ {
+ *ts_query = treesit_ensure_query_compiled (query, signal_symbol,
+ signal_data);
+ *cursor = XTS_COMPILED_QUERY (query)->cursor;
+ /* We don't need to free ts_query and cursor because they
+ are stored in a lisp object, which is tracked by gc. */
+ *need_free = false;
+ return (*ts_query != NULL);
+ }
+ else
+ {
+ /* Since query is not TS_COMPILED_QUERY, it can only be a string
+ or a cons. */
+ if (CONSP (query))
+ query = Ftreesit_query_expand (query);
+ char *query_string = SSDATA (query);
+ uint32_t error_offset;
+ TSQueryError error_type;
+ *ts_query = ts_query_new (lang, query_string, strlen (query_string),
+ &error_offset, &error_type);
+ if (*ts_query == NULL)
+ {
+ *signal_symbol = Qtreesit_query_error;
+ *signal_data = treesit_compose_query_signal_data (error_offset,
+ error_type, query);
+ return false;
+ }
+ else
+ {
+ *cursor = ts_query_cursor_new ();
+ *need_free = true;
+ return true;
+ }
+ }
+}
+
DEFUN ("treesit-query-capture",
Ftreesit_query_capture,
Streesit_query_capture, 2, 5, 0,
@@ -2681,35 +2813,12 @@ the query. */)
treesit_initialize ();
/* Resolve NODE into an actual node. */
- Lisp_Object lisp_node;
- if (TS_NODEP (node))
- {
- treesit_check_node (node); /* Check if up-to-date. */
- lisp_node = node;
- }
- else if (TS_PARSERP (node))
- {
- treesit_check_parser (node); /* Check if deleted. */
- lisp_node = Ftreesit_parser_root_node (node);
- }
- else if (SYMBOLP (node))
- {
- Lisp_Object parser
- = Ftreesit_parser_create (node, Fcurrent_buffer (), Qnil);
- lisp_node = Ftreesit_parser_root_node (parser);
- }
- else
- xsignal2 (Qwrong_type_argument,
- list4 (Qor, Qtreesit_node_p, Qtreesit_parser_p, Qsymbolp),
- node);
+ Lisp_Object lisp_node = treesit_resolve_node (node);
/* Extract C values from Lisp objects. */
- TSNode treesit_node
- = XTS_NODE (lisp_node)->node;
- Lisp_Object lisp_parser
- = XTS_NODE (lisp_node)->parser;
- ptrdiff_t visible_beg
- = XTS_PARSER (XTS_NODE (lisp_node)->parser)->visible_beg;
+ TSNode treesit_node = XTS_NODE (lisp_node)->node;
+ Lisp_Object lisp_parser = XTS_NODE (lisp_node)->parser;
+
const TSLanguage *lang
= ts_parser_language (XTS_PARSER (lisp_parser)->parser);
@@ -2725,44 +2834,21 @@ the query. */)
TSQuery *treesit_query;
TSQueryCursor *cursor;
bool needs_to_free_query_and_cursor;
- if (TS_COMPILED_QUERY_P (query))
- {
- Lisp_Object signal_symbol = Qnil;
- Lisp_Object signal_data = Qnil;
- treesit_query = treesit_ensure_query_compiled (query, &signal_symbol,
- &signal_data);
- cursor = XTS_COMPILED_QUERY (query)->cursor;
- /* We don't need to free ts_query and cursor because they
- are stored in a lisp object, which is tracked by gc. */
- needs_to_free_query_and_cursor = false;
- if (treesit_query == NULL)
- xsignal (signal_symbol, signal_data);
- }
- else
- {
- /* Since query is not TS_COMPILED_QUERY, it can only be a string
- or a cons. */
- if (CONSP (query))
- query = Ftreesit_query_expand (query);
- char *query_string = SSDATA (query);
- uint32_t error_offset;
- TSQueryError error_type;
- treesit_query = ts_query_new (lang, query_string, strlen (query_string),
- &error_offset, &error_type);
- if (treesit_query == NULL)
- xsignal (Qtreesit_query_error,
- treesit_compose_query_signal_data (error_offset,
- error_type, query));
- cursor = ts_query_cursor_new ();
- needs_to_free_query_and_cursor = true;
- }
+ Lisp_Object signal_symbol;
+ Lisp_Object signal_data;
+ if (!treesit_initialize_query (query, lang, &treesit_query, &cursor,
+ &needs_to_free_query_and_cursor,
+ &signal_symbol, &signal_data))
+ xsignal (signal_symbol, signal_data);
- /* WARN: After this point, free treesit_query and cursor before every
- signal and return. */
+ /* WARN: After this point, free TREESIT_QUERY and CURSOR before every
+ signal and return if NEEDS_TO_FREE_QUERY_AND_CURSOR is true. */
/* Set query range. */
if (!NILP (beg) && !NILP (end))
{
+ ptrdiff_t visible_beg
+ = XTS_PARSER (XTS_NODE (lisp_node)->parser)->visible_beg;
ptrdiff_t beg_byte = CHAR_TO_BYTE (XFIXNUM (beg));
ptrdiff_t end_byte = CHAR_TO_BYTE (XFIXNUM (end));
/* We never let tree-sitter run on buffers too large, so these
@@ -2791,11 +2877,16 @@ the query. */)
Lisp_Object result = Qnil;
Lisp_Object prev_result = result;
Lisp_Object predicates_table = make_vector (patterns_count, Qt);
+ Lisp_Object predicate_signal_data = Qnil;
+
+ struct buffer *old_buf = current_buffer;
+ set_buffer_internal (buf);
+
while (ts_query_cursor_next_match (cursor, &match))
{
/* Record the checkpoint that we may roll back to. */
prev_result = result;
- /* Get captured nodes. */
+ /* 1. Get captured nodes. */
const TSQueryCapture *captures = match.captures;
for (int idx = 0; idx < match.capture_count; idx++)
{
@@ -2818,9 +2909,10 @@ the query. */)
result = Fcons (cap, result);
}
- /* Get predicates. */
+ /* 2. Get predicates and check whether this match can be
+ included in the result list. */
Lisp_Object predicates = AREF (predicates_table, match.pattern_index);
- if (EQ (predicates, Qt))
+ if (BASE_EQ (predicates, Qt))
{
predicates = treesit_predicates_for_pattern (treesit_query,
match.pattern_index);
@@ -2829,15 +2921,28 @@ the query. */)
/* captures_lisp = Fnreverse (captures_lisp); */
struct capture_range captures_range = { result, prev_result };
- if (!treesit_eval_predicates (captures_range, predicates))
- /* Predicates didn't pass, roll back. */
+ bool match = treesit_eval_predicates (captures_range, predicates,
+ &predicate_signal_data);
+ if (!NILP (predicate_signal_data))
+ break;
+
+ /* Predicates didn't pass, roll back. */
+ if (!match)
result = prev_result;
}
+
+ /* Final clean up. */
if (needs_to_free_query_and_cursor)
{
ts_query_delete (treesit_query);
ts_query_cursor_delete (cursor);
}
+ set_buffer_internal (old_buf);
+
+ /* Some capture predicate signaled an error. */
+ if (!NILP (predicate_signal_data))
+ xsignal (Qtreesit_query_error, predicate_signal_data);
+
return Fnreverse (result);
}
@@ -2917,7 +3022,7 @@ treesit_cursor_helper (TSTreeCursor *cursor, TSNode node, Lisp_Object parser)
TSNode root = ts_tree_root_node (XTS_PARSER (parser)->tree);
*cursor = ts_tree_cursor_new (root);
bool success = treesit_cursor_helper_1 (cursor, &node, end_pos,
- treesit_recursion_limit);
+ TREESIT_RECURSION_LIMIT);
if (!success)
ts_tree_cursor_delete (cursor);
return success;
@@ -3048,10 +3153,147 @@ treesit_traverse_child_helper (TSTreeCursor *cursor,
}
}
-/* Return true if the node at CURSOR matches PRED. PRED can be a
- string or a function. This function assumes PRED is either a
- string or a function. If NAMED is true, also check that the node
- is named. */
+/* Assq but doesn't signal. */
+static Lisp_Object
+safe_assq (Lisp_Object key, Lisp_Object alist)
+{
+ Lisp_Object tail = alist;
+ FOR_EACH_TAIL_SAFE (tail)
+ if (CONSP (XCAR (tail)) && EQ (XCAR (XCAR (tail)), key))
+ return XCAR (tail);
+ return Qnil;
+}
+
+/* Given a symbol THING, and a language symbol LANGUAGE, find the
+ corresponding predicate definition in treesit-things-settings.
+ Don't check for the type of THING and LANGUAGE.
+
+ If there isn't one, return Qnil. */
+static Lisp_Object
+treesit_traverse_get_predicate (Lisp_Object thing, Lisp_Object language)
+{
+ Lisp_Object cons = safe_assq (language, Vtreesit_thing_settings);
+ if (NILP (cons))
+ return Qnil;
+ Lisp_Object definitions = XCDR (cons);
+ Lisp_Object entry = safe_assq (thing, definitions);
+ if (NILP (entry))
+ return Qnil;
+ /* ENTRY looks like (THING PRED). */
+ Lisp_Object cdr = XCDR (entry);
+ if (!CONSP (cdr))
+ return Qnil;
+ return XCAR (cdr);
+}
+
+/* Validate the PRED passed to treesit_traverse_match_predicate. If
+ there's an error, set SIGNAL_DATA to something signal accepts, and
+ return false, otherwise return true. This function also check for
+ recusion levels: we place a arbitrary 100 level limit on recursive
+ predicates. RECURSION_LEVEL is the current recursion level (that
+ starts at 0), if it goes over 99, return false and set
+ SIGNAL_DATA. LANGUAGE is a LANGUAGE symbol. */
+static bool
+treesit_traverse_validate_predicate (Lisp_Object pred,
+ Lisp_Object language,
+ Lisp_Object *signal_data,
+ ptrdiff_t recursion_level)
+{
+ if (recursion_level > 99)
+ {
+ *signal_data = list1 (build_string ("Predicate recursion level "
+ "exceeded: it must not exceed "
+ "100 levels"));
+ return false;
+ }
+ if (STRINGP (pred))
+ return true;
+ else if (FUNCTIONP (pred))
+ return true;
+ else if (SYMBOLP (pred))
+ {
+ Lisp_Object definition = treesit_traverse_get_predicate (pred,
+ language);
+ if (NILP (definition))
+ {
+ *signal_data = list2 (build_string ("Cannot find the definition "
+ "of the predicate in "
+ "`treesit-things-settings'"),
+ pred);
+ return false;
+ }
+ return treesit_traverse_validate_predicate (definition,
+ language,
+ signal_data,
+ recursion_level + 1);
+ }
+ else if (CONSP (pred))
+ {
+ Lisp_Object car = XCAR (pred);
+ Lisp_Object cdr = XCDR (pred);
+ if (BASE_EQ (car, Qnot))
+ {
+ if (!CONSP (cdr))
+ {
+ *signal_data = list2 (build_string ("Invalide `not' "
+ "predicate"),
+ pred);
+ return false;
+ }
+ /* At this point CDR must be a cons. */
+ if (XFIXNUM (Flength (cdr)) != 1)
+ {
+ *signal_data = list2 (build_string ("`not' can only "
+ "have one argument"),
+ pred);
+ return false;
+ }
+ return treesit_traverse_validate_predicate (XCAR (cdr),
+ language,
+ signal_data,
+ recursion_level + 1);
+ }
+ else if (BASE_EQ (car, Qor))
+ {
+ if (!CONSP (cdr) || NILP (cdr))
+ {
+ *signal_data = list2 (build_string ("`or' must have a list "
+ "of patterns as "
+ "arguments "),
+ pred);
+ return false;
+ }
+ FOR_EACH_TAIL (cdr)
+ {
+ if (!treesit_traverse_validate_predicate (XCAR (cdr),
+ language,
+ signal_data,
+ recursion_level + 1))
+ return false;
+ }
+ return true;
+ }
+ else if (STRINGP (car) && FUNCTIONP (cdr))
+ return true;
+ }
+ *signal_data = list2 (build_string ("Invalid predicate, see `treesit-thing-settings' for valid forms of predicate"),
+ pred);
+ return false;
+}
+
+/* Return true if the node at CURSOR matches PRED. PRED can be a lot
+ of things:
+
+ PRED := string | function | (string . function)
+ | (or PRED...) | (not PRED)
+
+ See docstring of treesit-search-forward and friends for the meaning
+ of each shape.
+
+ This function assumes PRED is in one of its valid forms. If NAMED
+ is true, also check that the node is named.
+
+ This function may signal if the predicate function signals. */
static bool
treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred,
Lisp_Object parser, bool named)
@@ -3065,24 +3307,67 @@ treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred,
const char *type = ts_node_type (node);
return fast_c_string_match (pred, type, strlen (type)) >= 0;
}
- else
+ else if (FUNCTIONP (pred))
{
Lisp_Object lisp_node = make_treesit_node (parser, node);
return !NILP (CALLN (Ffuncall, pred, lisp_node));
}
+ else if (SYMBOLP (pred))
+ {
+ Lisp_Object language = XTS_PARSER (parser)->language_symbol;
+ Lisp_Object definition = treesit_traverse_get_predicate (pred,
+ language);
+ return treesit_traverse_match_predicate (cursor, definition,
+ parser, named);
+ }
+ else if (CONSP (pred))
+ {
+ Lisp_Object car = XCAR (pred);
+ Lisp_Object cdr = XCDR (pred);
+
+ if (BASE_EQ (car, Qnot))
+ return !treesit_traverse_match_predicate (cursor, XCAR (cdr),
+ parser, named);
+ else if (BASE_EQ (car, Qor))
+ {
+ FOR_EACH_TAIL (cdr)
+ {
+ if (treesit_traverse_match_predicate (cursor, XCAR (cdr),
+ parser, named))
+ return true;
+ }
+ return false;
+ }
+ else if (STRINGP (car) && FUNCTIONP (cdr))
+ {
+ /* A bit of code duplication here, but should be fine. */
+ const char *type = ts_node_type (node);
+ if (!(fast_c_string_match (car, type, strlen (type)) >= 0))
+ return false;
+
+ Lisp_Object lisp_node = make_treesit_node (parser, node);
+ if (NILP (CALLN (Ffuncall, cdr, lisp_node)))
+ return false;
+
+ return true;
+ }
+ }
+ /* Returning false is better than UB. */
+ return false;
}
-/* Traverse the parse tree starting from CURSOR. PRED can be a
- function (takes a node and returns nil/non-nil), or a string
- (treated as regexp matching the node's type, must be all single
- byte characters). If the node satisfies PRED, leave CURSOR on that
- node and return true. If no node satisfies PRED, move CURSOR back
- to starting position and return false.
+/* Traverse the parse tree starting from CURSOR. See
+ `treesit-thing-settings' for the shapes PRED can have. If the
+ node satisfies PRED, leave CURSOR on that node and return true. If
+ no node satisfies PRED, move CURSOR back to starting position and
+ return false.
LIMIT is the number of levels we descend in the tree. FORWARD
controls the direction in which we traverse the tree, true means
forward, false backward. If SKIP_ROOT is true, don't match ROOT.
- */
+
+ This function may signal if the predicate function signals. */
+
static bool
treesit_search_dfs (TSTreeCursor *cursor,
Lisp_Object pred, Lisp_Object parser,
@@ -3118,7 +3403,10 @@ treesit_search_dfs (TSTreeCursor *cursor,
START. PRED, PARSER, NAMED, FORWARD are the same as in
ts_search_subtree. If a match is found, leave CURSOR at that node,
and return true, if no match is found, return false, and CURSOR's
- position is undefined. */
+ position is undefined.
+
+ This function may signal if the predicate function signals. */
+
static bool
treesit_search_forward (TSTreeCursor *cursor,
Lisp_Object pred, Lisp_Object parser,
@@ -3128,8 +3416,7 @@ treesit_search_forward (TSTreeCursor *cursor,
nodes. This way repeated call of this function traverses each
node in the tree once and only once:
- (while node (setq node (treesit-search-forward node)))
- */
+ (while node (setq node (treesit-search-forward node))) */
bool initial = true;
while (true)
{
@@ -3156,6 +3443,14 @@ treesit_search_forward (TSTreeCursor *cursor,
}
}
+/* Clean up the given tree cursor CURSOR. */
+
+static void
+treesit_traverse_cleanup_cursor (void *cursor)
+{
+ ts_tree_cursor_delete (cursor);
+}
+
DEFUN ("treesit-search-subtree",
Ftreesit_search_subtree,
Streesit_search_subtree, 2, 5, 0,
@@ -3175,14 +3470,12 @@ Return the first matched node, or nil if none matches. */)
Lisp_Object all, Lisp_Object depth)
{
CHECK_TS_NODE (node);
- CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
- list3 (Qor, Qstringp, Qfunctionp), predicate);
CHECK_SYMBOL (all);
CHECK_SYMBOL (backward);
/* We use a default limit of 1000. See bug#59426 for the
discussion. */
- ptrdiff_t the_limit = treesit_recursion_limit;
+ ptrdiff_t the_limit = TREESIT_RECURSION_LIMIT;
if (!NILP (depth))
{
CHECK_FIXNUM (depth);
@@ -3192,19 +3485,29 @@ Return the first matched node, or nil if none matches. */)
treesit_initialize ();
Lisp_Object parser = XTS_NODE (node)->parser;
+ Lisp_Object language = XTS_PARSER (parser)->language_symbol;
+
+ Lisp_Object signal_data = Qnil;
+ if (!treesit_traverse_validate_predicate (predicate, language,
+ &signal_data, 0))
+ xsignal1 (Qtreesit_invalid_predicate, signal_data);
+
Lisp_Object return_value = Qnil;
TSTreeCursor cursor;
if (!treesit_cursor_helper (&cursor, XTS_NODE (node)->node, parser))
return return_value;
+ specpdl_ref count = SPECPDL_INDEX ();
+ record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
+
if (treesit_search_dfs (&cursor, predicate, parser, NILP (backward),
NILP (all), the_limit, false))
{
TSNode node = ts_tree_cursor_current_node (&cursor);
return_value = make_treesit_node (parser, node);
}
- ts_tree_cursor_delete (&cursor);
- return return_value;
+
+ return unbind_to (count, return_value);
}
DEFUN ("treesit-search-forward",
@@ -3241,33 +3544,43 @@ always traverse leaf nodes first, then upwards. */)
Lisp_Object all)
{
CHECK_TS_NODE (start);
- CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
- list3 (Qor, Qstringp, Qfunctionp), predicate);
CHECK_SYMBOL (all);
CHECK_SYMBOL (backward);
treesit_initialize ();
Lisp_Object parser = XTS_NODE (start)->parser;
+ Lisp_Object language = XTS_PARSER (parser)->language_symbol;
+
+ Lisp_Object signal_data = Qnil;
+ if (!treesit_traverse_validate_predicate (predicate, language,
+ &signal_data, 0))
+ xsignal1 (Qtreesit_invalid_predicate, signal_data);
+
Lisp_Object return_value = Qnil;
TSTreeCursor cursor;
if (!treesit_cursor_helper (&cursor, XTS_NODE (start)->node, parser))
return return_value;
+ specpdl_ref count = SPECPDL_INDEX ();
+ record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
+
if (treesit_search_forward (&cursor, predicate, parser,
NILP (backward), NILP (all)))
{
TSNode node = ts_tree_cursor_current_node (&cursor);
return_value = make_treesit_node (parser, node);
}
- ts_tree_cursor_delete (&cursor);
- return return_value;
+
+ return unbind_to (count, return_value);
}
/* Recursively traverse the tree under CURSOR, and append the result
subtree to PARENT's cdr. See more in Ftreesit_induce_sparse_tree.
Note that the top-level children list is reversed, because
- reasons. */
+ reasons.
+
+ This function may signal if the predicate function signals. */
static void
treesit_build_sparse_tree (TSTreeCursor *cursor, Lisp_Object parent,
Lisp_Object pred, Lisp_Object process_fn,
@@ -3353,15 +3666,13 @@ a regexp. */)
Lisp_Object depth)
{
CHECK_TS_NODE (root);
- CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
- list3 (Qor, Qstringp, Qfunctionp), predicate);
if (!NILP (process_fn))
CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn);
/* We use a default limit of 1000. See bug#59426 for the
discussion. */
- ptrdiff_t the_limit = treesit_recursion_limit;
+ ptrdiff_t the_limit = TREESIT_RECURSION_LIMIT;
if (!NILP (depth))
{
CHECK_FIXNUM (depth);
@@ -3371,21 +3682,68 @@ a regexp. */)
treesit_initialize ();
Lisp_Object parser = XTS_NODE (root)->parser;
+ Lisp_Object language = XTS_PARSER (parser)->language_symbol;
+
+ Lisp_Object signal_data = Qnil;
+ if (!treesit_traverse_validate_predicate (predicate, language,
+ &signal_data, 0))
+ xsignal1 (Qtreesit_invalid_predicate, signal_data);
+
Lisp_Object parent = Fcons (Qnil, Qnil);
/* In this function we never traverse above NODE, so we don't need
to use treesit_cursor_helper. */
TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (root)->node);
+ specpdl_ref count = SPECPDL_INDEX ();
+ record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
+
treesit_build_sparse_tree (&cursor, parent, predicate, process_fn,
the_limit, parser);
- ts_tree_cursor_delete (&cursor);
+
+ unbind_to (count, Qnil);
+
Fsetcdr (parent, Fnreverse (Fcdr (parent)));
+
if (NILP (Fcdr (parent)))
return Qnil;
else
return parent;
}
+DEFUN ("treesit-node-match-p",
+ Ftreesit_node_match_p,
+ Streesit_node_match_p, 2, 2, 0,
+ doc: /* Check whether NODE matches PREDICATE.
+
+PREDICATE can be a regexp matching node type, a predicate function,
+and more, see `treesit-things-definition' for detail. Return non-nil
+if NODE matches PRED, nil otherwise. */)
+ (Lisp_Object node, Lisp_Object predicate)
+{
+ CHECK_TS_NODE (node);
+
+ Lisp_Object parser = XTS_NODE (node)->parser;
+ Lisp_Object language = XTS_PARSER (parser)->language_symbol;
+
+ Lisp_Object signal_data = Qnil;
+ if (!treesit_traverse_validate_predicate (predicate, language,
+ &signal_data, 0))
+ xsignal1 (Qtreesit_invalid_predicate, signal_data);
+
+ TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (node)->node);
+
+ specpdl_ref count = SPECPDL_INDEX ();
+ record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
+
+ bool match = false;
+ match = treesit_traverse_match_predicate (&cursor, predicate,
+ parser, false);
+
+ unbind_to (count, Qnil);
+
+ return match ? Qt : Qnil;
+}
+
DEFUN ("treesit-subtree-stat",
Ftreesit_subtree_stat,
Streesit_subtree_stat, 1, 1, 0,
@@ -3480,8 +3838,12 @@ syms_of_treesit (void)
DEFSYM (Qoutdated, "outdated");
DEFSYM (Qhas_error, "has-error");
DEFSYM (Qlive, "live");
+ DEFSYM (Qnot, "not");
DEFSYM (QCanchor, ":anchor");
+ DEFSYM (QCquestion, ":?");
+ DEFSYM (QCstar, ":*");
+ DEFSYM (QCplus, ":+");
DEFSYM (QCequal, ":equal");
DEFSYM (QCmatch, ":match");
DEFSYM (QCpred, ":pred");
@@ -3504,6 +3866,7 @@ syms_of_treesit (void)
"user-emacs-directory");
DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted");
DEFSYM (Qtreesit_pattern_expand, "treesit-pattern-expand");
+ DEFSYM (Qtreesit_invalid_predicate, "treesit-invalid-predicate");
DEFSYM (Qor, "or");
@@ -3531,6 +3894,10 @@ syms_of_treesit (void)
define_error (Qtreesit_parser_deleted,
"This parser is deleted and cannot be used",
Qtreesit_error);
+ define_error (Qtreesit_invalid_predicate,
+ "Invalid predicate, see `treesit-thing-settings' "
+ "for valid forms for a predicate",
+ Qtreesit_error);
DEFVAR_LISP ("treesit-load-name-override-list",
Vtreesit_load_name_override_list,
@@ -3561,6 +3928,33 @@ then in the `tree-sitter' subdirectory of `user-emacs-directory', and
then in the system default locations for dynamic libraries, in that order. */);
Vtreesit_extra_load_path = Qnil;
+ DEFVAR_LISP ("treesit-thing-settings",
+ Vtreesit_thing_settings,
+ doc:
+ /* A list defining things.
+
+The value should be an alist of (LANGUAGE . DEFINITIONS), where
+LANGUAGE is a language symbol, and DEFINITIONS is a list of
+
+ (THING PRED)
+
+THING is a symbol representing the thing, like `defun', `sexp', or
+`block'; PRED defines what kind of node can be qualified as THING.
+
+PRED can be a regexp string that matches the type of the node; it can
+be a predicate function that takes the node as the sole argument and
+returns t if the node is the thing; it can be a cons (REGEXP . FN),
+which is a combination of a regexp and a predicate function, and the
+node has to match both to qualify as the thing.
+
+PRED can also be recursively defined. It can be (or PRED...), meaning
+satisfying anyone of the inner PREDs qualifies the node; or (not
+PRED), meaning not satisfying the inner PRED qualifies the node.
+
+Finally, PRED can refer to other THINGs defined in this list by using
+the symbol of that THING. For example, (or block sexp). */);
+ Vtreesit_thing_settings = Qnil;
+
staticpro (&Vtreesit_str_libtree_sitter);
Vtreesit_str_libtree_sitter = build_pure_c_string ("libtree-sitter-");
staticpro (&Vtreesit_str_tree_sitter);
@@ -3648,6 +4042,7 @@ then in the system default locations for dynamic libraries, in that order. */);
defsubr (&Streesit_search_subtree);
defsubr (&Streesit_search_forward);
defsubr (&Streesit_induce_sparse_tree);
+ defsubr (&Streesit_node_match_p);
defsubr (&Streesit_subtree_stat);
#endif /* HAVE_TREE_SITTER */
defsubr (&Streesit_available_p);