diff options
Diffstat (limited to 'lisp/progmodes/c-ts-mode.el')
-rw-r--r-- | lisp/progmodes/c-ts-mode.el | 237 |
1 files changed, 149 insertions, 88 deletions
diff --git a/lisp/progmodes/c-ts-mode.el b/lisp/progmodes/c-ts-mode.el index e69856baecc..3a89f0f494b 100644 --- a/lisp/progmodes/c-ts-mode.el +++ b/lisp/progmodes/c-ts-mode.el @@ -71,6 +71,8 @@ (eval-when-compile (require 'rx)) (declare-function treesit-parser-create "treesit.c") +(declare-function treesit-parser-root-node "treesit.c") +(declare-function treesit-parser-set-included-ranges "treesit.c") (declare-function treesit-node-parent "treesit.c") (declare-function treesit-node-start "treesit.c") (declare-function treesit-node-end "treesit.c") @@ -80,7 +82,6 @@ (declare-function treesit-node-prev-sibling "treesit.c") (declare-function treesit-node-first-child-for-pos "treesit.c") (declare-function treesit-node-next-sibling "treesit.c") -(declare-function treesit-parser-set-included-ranges "treesit.c") (declare-function treesit-query-compile "treesit.c") ;;; Custom variables @@ -96,7 +97,7 @@ "Toggle the comment style between block and line comments. Optional numeric ARG, if supplied, switches to block comment style when positive, to line comment style when negative, and -just toggles it when zero or left out." +just toggles it when zero or omitted." (interactive "P") (let ((prevstate-line (string= comment-start "// "))) (when (or (not arg) @@ -134,7 +135,7 @@ symbol." res) (let ((buffer (car buffers))) (with-current-buffer buffer - (if (derived-mode-p 'c-ts-mode 'c++-ts-mode) + (if (derived-mode-p '(c-ts-mode c++-ts-mode)) (loop (append res (list buffer)) (cdr buffers)) (loop res (cdr buffers)))))))) @@ -146,9 +147,9 @@ symbol." "Style used for indentation. The selected style could be one of GNU, K&R, LINUX or BSD. If -one of the supplied styles doesn't suffice, a function could be -set instead. This function is expected to return a list that -follows the form of `treesit-simple-indent-rules'." +one of the supplied styles doesn't suffice, the value could be +a function instead. This function is expected to return a list +that follows the form of `treesit-simple-indent-rules'." :version "29.1" :type '(choice (symbol :tag "Gnu" gnu) (symbol :tag "K&R" k&r) @@ -192,7 +193,7 @@ in this Emacs session." To set the default indent style globally, use `c-ts-mode-set-global-style'." (interactive (list (c-ts-mode--prompt-for-style))) - (if (not (derived-mode-p 'c-ts-mode 'c++-ts-mode)) + (if (not (derived-mode-p '(c-ts-mode c++-ts-mode))) (user-error "The current buffer is not in `c-ts-mode' nor `c++-ts-mode'") (setq-local c-ts-mode-indent-style style) (setq treesit-simple-indent-rules @@ -201,8 +202,8 @@ To set the default indent style globally, use (if (derived-mode-p 'c-ts-mode) 'c 'cpp)))))) (defcustom c-ts-mode-emacs-sources-support t - "Whether to enable Emacs source-specific features. -This enables detection of definitions of Lisp function using + "Whether to enable Emacs source-specific C features. +This enables detection of definitions of Lisp functions via the DEFUN macro. This needs to be set before enabling `c-ts-mode'; if you change the value after enabling `c-ts-mode', toggle the mode off and on @@ -242,7 +243,7 @@ again." < and > are usually punctuation, e.g., in ->. But when used for templates, they should be considered pairs. -This function checks for < and > in the changed RANGES and apply +This function checks for < and > in the changed RANGES and applies appropriate text property to alter the syntax of template delimiters < and >'s." (goto-char beg) @@ -283,9 +284,9 @@ is actually the parent of point at the moment of indentation." "Return the start of the previous named sibling of NODE. This anchor handles the special case where the previous sibling -is a labeled_statement, in that case, return the child of the +is a labeled_statement; in that case, return the child of the labeled statement instead. (Actually, recursively go down until -the node isn't a labeled_statement.) Eg, +the node isn't a labeled_statement.) E.g., label: int x = 1; @@ -294,10 +295,11 @@ label: The anchor of \"int y = 2;\" should be \"int x = 1;\" rather than the labeled_statement. -Return nil if a) there is no prev-sibling, or 2) prev-sibling +Return nil if a) there is no prev-sibling, or b) prev-sibling doesn't have a child. -PARENT and BOL are like other anchor functions." +PARENT is NODE's parent, BOL is the beginning of non-whitespace +characters of the current line." (when-let ((prev-sibling (or (treesit-node-prev-sibling node t) (treesit-node-prev-sibling @@ -335,7 +337,7 @@ PARENT and BOL are like other anchor functions." (defun c-ts-mode--standalone-parent-skip-preproc (_n parent &rest _) "Like the standalone-parent anchor but skips preproc nodes. -PARENT is the same as other anchor functions." +PARENT is the parent of the current node." (save-excursion (treesit-node-start (treesit-parent-until @@ -343,7 +345,7 @@ PARENT is the same as other anchor functions." ;; nil. parent (lambda (node) (and node - (not (string-match "preproc" (treesit-node-type node))) + (not (string-search "preproc" (treesit-node-type node))) (progn (goto-char (treesit-node-start node)) (looking-back (rx bol (* whitespace)) @@ -352,13 +354,15 @@ PARENT is the same as other anchor functions." (defun c-ts-mode--standalone-grandparent (_node parent bol &rest args) "Like the standalone-parent anchor but pass it the grandparent. -PARENT, BOL, ARGS are the same as other anchor functions." +PARENT is NODE's parent, BOL is the beginning of non-whitespace +characters of the current line." (apply (alist-get 'standalone-parent treesit-simple-indent-presets) parent (treesit-node-parent parent) bol args)) (defun c-ts-mode--else-heuristic (node parent bol &rest _) "Heuristic matcher for when \"else\" is followed by a closing bracket. -NODE, PARENT, and BOL are the same as in other matchers." +PARENT is NODE's parent, BOL is the beginning of non-whitespace +characters of the current line." (and (null node) (save-excursion (forward-line -1) @@ -534,6 +538,13 @@ NODE should be a labeled_statement. PARENT is its parent." ;;; Font-lock +(defvar c-ts-mode--feature-list + '(( comment definition) + ( keyword preprocessor string type) + ( assignment constant escape-sequence label literal) + ( bracket delimiter error function operator property variable)) + "`treesit-font-lock-feature-list' for `c-ts-mode'.") + (defvar c-ts-mode--preproc-keywords '("#define" "#if" "#ifdef" "#ifndef" "#else" "#elif" "#endif" "#include") @@ -749,7 +760,7 @@ MODE is either `c' or `cpp'." (defun c-ts-mode--declarator-identifier (node &optional qualified) "Return the identifier of the declarator node NODE. -If QUALIFIED is non-nil, include the names space part of the +If QUALIFIED is non-nil, include the namespace part of the identifier and return a qualified_identifier." (pcase (treesit-node-type node) ;; Recurse. @@ -774,7 +785,7 @@ identifier and return a qualified_identifier." node))) (defun c-ts-mode--fontify-declarator (node override start end &rest _args) - "Fontify a declarator (whatever under the \"declarator\" field). + "Fontify a declarator (whatever is under the \"declarator\" field). For NODE, OVERRIDE, START, END, and ARGS, see `treesit-font-lock-rules'." (let* ((identifier (c-ts-mode--declarator-identifier node)) @@ -809,7 +820,7 @@ For NODE, OVERRIDE, START, END, and ARGS, see (defun c-ts-mode--fontify-variable (node override start end &rest _) "Fontify an identifier node if it is a variable. -Don't fontify if it is a function identifier. For NODE, +Don't fontify it if it is a function identifier. For NODE, OVERRIDE, START, END, and ARGS, see `treesit-font-lock-rules'." (when (not (equal (treesit-node-type (treesit-node-parent node)) @@ -903,7 +914,8 @@ Return nil if NODE is not a defun node or doesn't have a name." t)) ((or "struct_specifier" "enum_specifier" "union_specifier" "class_specifier" - "namespace_definition") + "namespace_definition" + "preproc_def" "preproc_function_def") (treesit-node-child-by-field-name node "name")) ;; DEFUNs in Emacs sources. ("expression_statement" @@ -914,11 +926,22 @@ Return nil if NODE is not a defun node or doesn't have a name." name))) t)) +;;; Outline minor mode + +(defun c-ts-mode--outline-predicate (node) + "Match outlines on lines with function names." + (or (and (equal (treesit-node-type node) "function_declarator") + (equal (treesit-node-type (treesit-node-parent node)) + "function_definition")) + ;; DEFUNs in Emacs sources. + (and c-ts-mode-emacs-sources-support + (c-ts-mode--emacs-defun-p node)))) + ;;; Defun navigation (defun c-ts-mode--defun-valid-p (node) "Return non-nil if NODE is a valid defun node. -Ie, NODE is not nested." +That is, NODE is not nested." (let ((top-level-p (lambda (node) (not (treesit-node-top-level node (rx (or "function_definition" @@ -957,8 +980,7 @@ Basically, if NODE is a class, return non-nil; if NODE is a function but is under a class, return non-nil; if NODE is a top-level function, return nil. -This is for the Class subindex in -`treesit-simple-imenu-settings'." +This is for the Class subindex in `treesit-simple-imenu-settings'." (pcase (treesit-node-type node) ;; The Class subindex only has class_specifier and ;; function_definition. @@ -969,7 +991,7 @@ This is for the Class subindex in (defun c-ts-mode--defun-skipper () "Custom defun skipper for `c-ts-mode' and friends. -Structs in C ends with a semicolon, but the semicolon is not +Structs in C end with a semicolon, but the semicolon is not considered part of the struct node, so point would stop before the semicolon. This function skips the semicolon." (when (looking-at (rx (* (or " " "\t")) ";")) @@ -989,7 +1011,7 @@ the semicolon. This function skips the semicolon." (list node parent bol))) (defun c-ts-mode--emacs-defun-p (node) - "Return non-nil if NODE is a Lisp function defined using DEFUN. + "Return non-nil if NODE is a Lisp function defined via DEFUN. This function detects Lisp primitives defined in Emacs source files using the DEFUN macro." (and (equal (treesit-node-type node) "expression_statement") @@ -1010,15 +1032,15 @@ files using the DEFUN macro." "Return the defun node at point. In addition to regular C functions, this function recognizes -definitions of Lisp primitrives in Emacs source files using DEFUN, -if `c-ts-mode-emacs-sources-support' is non-nil. +definitions of Lisp primitrives in Emacs source files defined +via DEFUN, if `c-ts-mode-emacs-sources-support' is non-nil. Note that DEFUN is parsed by tree-sitter as two separate nodes, one for the declaration and one for the body; this function returns the declaration node. If RANGE is non-nil, return (BEG . END) where BEG end END -encloses the whole defun. This is for when the entire defun +enclose the whole defun. This is for when the entire defun is required, not just the declaration part for DEFUN." (when-let* ((node (treesit-defun-at-point)) (defun-range (cons (treesit-node-start node) @@ -1047,22 +1069,51 @@ is required, not just the declaration part for DEFUN." "Return the name of the current defun. This is used for `add-log-current-defun-function'. In addition to regular C functions, this function also recognizes -Emacs primitives defined using DEFUN in Emacs sources, +Emacs primitives defined via DEFUN in Emacs sources, if `c-ts-mode-emacs-sources-support' is non-nil." (or (treesit-add-log-current-defun) (c-ts-mode--defun-name (c-ts-mode--emacs-defun-at-point)))) +;;; Things + +(defvar c-ts-mode--thing-settings + `(;; It's more useful to include semicolons as sexp so + ;; that users can move to the end of a statement. + (sexp (not ,(rx (or "{" "}" "[" "]" "(" ")" ",")))) + ;; compound_statement makes us jump over too big units + ;; of code, so skip that one, and include the other + ;; statements. + (sentence + ,(regexp-opt '("preproc" + "declaration" + "specifier" + "attributed_statement" + "labeled_statement" + "expression_statement" + "if_statement" + "switch_statement" + "do_statement" + "while_statement" + "for_statement" + "return_statement" + "break_statement" + "continue_statement" + "goto_statement" + "case_statement"))) + (text ,(regexp-opt '("comment" + "raw_string_literal")))) + "`treesit-thing-settings' for both C and C++.") + ;;; Support for FOR_EACH_* macros ;; ;; FOR_EACH_TAIL, FOR_EACH_TAIL_SAFE, FOR_EACH_FRAME etc., followed by ;; an unbracketed body will mess up the parser, which parses the thing ;; as a function declaration. We "fix" it by adding a shadow parser -;; for a language 'emacs-c' (which is just 'c' but under a different -;; name). We use 'emacs-c' to find each FOR_EACH_* macro with a -;; unbracketed body, and set the ranges of the C parser so that it -;; skips those FOR_EACH_*'s. Note that we only ignore FOR_EACH_*'s -;; with a unbracketed body. Those with a bracketed body parse more -;; or less fine. +;; with the tag `for-each'. We use this parser to find each +;; FOR_EACH_* macro with a unbracketed body, and set the ranges of the +;; default C parser so that it skips those FOR_EACH_*'s. Note that we +;; only ignore FOR_EACH_*'s with a unbracketed body. Those with a +;; bracketed body parse more or less fine. ;; ;; In the meantime, we have a special fontification rule for ;; FOR_EACH_* macros with a bracketed body that removes any applied @@ -1083,12 +1134,12 @@ For BOL see `treesit-simple-indent-rules'." (defvar c-ts-mode--emacs-c-range-query (when (treesit-available-p) (treesit-query-compile - 'emacs-c `(((declaration - type: (macro_type_specifier - name: (identifier) @_name) - @for-each-tail) - (:match ,c-ts-mode--for-each-tail-regexp - @_name))))) + 'c `(((declaration + type: (macro_type_specifier + name: (identifier) @_name) + @for-each-tail) + (:match ,c-ts-mode--for-each-tail-regexp + @_name))))) "Query that finds a FOR_EACH_* macro with an unbracketed body.") (defvar-local c-ts-mode--for-each-tail-ranges nil @@ -1096,7 +1147,7 @@ For BOL see `treesit-simple-indent-rules'." (defun c-ts-mode--reverse-ranges (ranges beg end) "Reverse RANGES and return the new ranges between BEG and END. -Positions that were included RANGES are not in the returned +Positions that were included in RANGES are not in the returned ranges, and vice versa. Return nil if RANGES is nil. This way, passing the returned @@ -1118,9 +1169,11 @@ parser parse the whole buffer." "Set ranges for the C parser to skip some FOR_EACH_* macros. BEG and END are described in `treesit-range-rules'." (let* ((c-parser (treesit-parser-create 'c)) + (for-each-parser (treesit-parser-create 'c nil nil 'for-each)) (old-ranges c-ts-mode--for-each-tail-ranges) (new-ranges (treesit-query-range - 'emacs-c c-ts-mode--emacs-c-range-query beg end)) + (treesit-parser-root-node for-each-parser) + c-ts-mode--emacs-c-range-query beg end)) (set-ranges (treesit--clip-ranges (treesit--merge-ranges old-ranges new-ranges beg end) @@ -1140,7 +1193,6 @@ BEG and END are described in `treesit-range-rules'." "C-c C-c" #'comment-region "C-c C-k" #'c-ts-mode-toggle-comment-style) -;;;###autoload (define-derived-mode c-ts-base-mode prog-mode "C" "Major mode for editing C, powered by tree-sitter. @@ -1156,7 +1208,9 @@ BEG and END are described in `treesit-range-rules'." "enum_specifier" "union_specifier" "class_specifier" - "namespace_definition") + "namespace_definition" + "preproc_def" + "preproc_function_def") (and c-ts-mode-emacs-sources-support '(;; DEFUN. "expression_statement" @@ -1166,6 +1220,13 @@ BEG and END are described in `treesit-range-rules'." (setq-local treesit-defun-skipper #'c-ts-mode--defun-skipper) (setq-local treesit-defun-name-function #'c-ts-mode--defun-name) + ;; IMO it makes more sense to define what's NOT sexp, since sexp by + ;; spirit, especially when used for movement, is like "expression" + ;; or "syntax unit". --yuan + (setq-local treesit-thing-settings + `((c ,@c-ts-mode--thing-settings) + (cpp ,@c-ts-mode--thing-settings))) + ;; Nodes like struct/enum/union_specifier can appear in ;; function_definitions, so we need to find the top-level node. (setq-local treesit-defun-prefer-top-level t) @@ -1213,11 +1274,12 @@ BEG and END are described in `treesit-range-rules'." eos) c-ts-mode--defun-for-class-in-imenu-p nil)))) + ;; Outline minor mode + (setq-local treesit-outline-predicate + #'c-ts-mode--outline-predicate) + (setq-local treesit-font-lock-feature-list - '(( comment definition) - ( keyword preprocessor string type) - ( assignment constant escape-sequence label literal) - ( bracket delimiter error function operator property variable)))) + c-ts-mode--feature-list)) (defvar treesit-load-name-override-list) @@ -1227,7 +1289,7 @@ BEG and END are described in `treesit-range-rules'." This mode is independent from the classic cc-mode.el based `c-mode', so configuration variables of that mode, like -`c-basic-offset', doesn't affect this mode. +`c-basic-offset', don't affect this mode. To use tree-sitter C/C++ modes by default, evaluate @@ -1236,21 +1298,15 @@ To use tree-sitter C/C++ modes by default, evaluate (add-to-list \\='major-mode-remap-alist \\='(c-or-c++-mode . c-or-c++-ts-mode)) -in your configuration." +in your init files." :group 'c :after-hook (c-ts-mode-set-modeline) (when (treesit-ready-p 'c) - ;; Add a fake "emacs-c" language which is just C. Used for - ;; skipping FOR_EACH_* macros, see `c-ts-mode--emacs-set-ranges'. - (setf (alist-get 'emacs-c treesit-load-name-override-list) - '("libtree-sitter-c" "tree_sitter_c")) - ;; If Emacs source support is enabled, make sure emacs-c parser is - ;; after c parser in the parser list. This way various tree-sitter - ;; functions will automatically use the c parser rather than the - ;; emacs-c parser. + ;; Create an "for-each" parser, see `c-ts-mode--emacs-set-ranges' + ;; for more. (when c-ts-mode-emacs-sources-support - (treesit-parser-create 'emacs-c)) + (treesit-parser-create 'c nil nil 'for-each)) (treesit-parser-create 'c) ;; Comments. @@ -1277,6 +1333,8 @@ in your configuration." (lambda (_pos) 'c)) (treesit-font-lock-recompute-features '(emacs-devel))))) +(derived-mode-add-parents 'c-ts-mode '(c-mode)) + ;;;###autoload (define-derived-mode c++-ts-mode c-ts-base-mode "C++" "Major mode for editing C++, powered by tree-sitter. @@ -1292,7 +1350,7 @@ To use tree-sitter C/C++ modes by default, evaluate (add-to-list \\='major-mode-remap-alist \\='(c-or-c++-mode . c-or-c++-ts-mode)) -in your configuration. +in your init files. Since this mode uses a parser, unbalanced brackets might cause some breakage in indentation/fontification. Therefore, it's @@ -1301,13 +1359,17 @@ recommended to enable `electric-pair-mode' with this mode." :after-hook (c-ts-mode-set-modeline) (when (treesit-ready-p 'cpp) + (treesit-parser-create 'cpp) + ;; Syntax. (setq-local syntax-propertize-function #'c-ts-mode--syntax-propertize) + ;; Indent. (setq-local treesit-simple-indent-rules (c-ts-mode--get-indent-style 'cpp)) + ;; Font-lock. (setq-local treesit-font-lock-settings (c-ts-mode--font-lock-settings 'cpp)) (treesit-major-mode-setup) @@ -1316,6 +1378,8 @@ recommended to enable `electric-pair-mode' with this mode." (setq-local add-log-current-defun-function #'c-ts-mode--emacs-current-defun-name)))) +(derived-mode-add-parents 'c++-ts-mode '(c++-mode)) + (easy-menu-define c-ts-mode-menu (list c-ts-mode-map c++-ts-mode-map) "Menu for `c-ts-mode' and `c++-ts-mode'." '("C/C++" @@ -1361,7 +1425,7 @@ recommended to enable `electric-pair-mode' with this mode." "\\|" id "::" "\\|" id ws-maybe "=\\)" "\\|" "\\(?:inline" ws "\\)?namespace" - "\\(:?" ws "\\(?:" id "::\\)*" id "\\)?" ws-maybe "{" + "\\(?:" ws "\\(?:" id "::\\)*" id "\\)?" ws-maybe "{" "\\|" "class" ws id "\\(?:" ws "final" "\\)?" ws-maybe "[:{;\n]" "\\|" "struct" ws id "\\(?:" ws "final" ws-maybe "[:{\n]" @@ -1381,38 +1445,35 @@ matching on file name insufficient for detecting major mode that should be used. This function attempts to use file contents to determine whether -the code is C or C++ and based on that chooses whether to enable +the code is C or C++, and based on that chooses whether to enable `c-ts-mode' or `c++-ts-mode'." + (declare (obsolete c-or-c++-mode "30.1")) (interactive) - (if (save-excursion - (save-restriction - (save-match-data ; Why `save-match-data'? - (widen) - (goto-char (point-min)) - (re-search-forward c-ts-mode--c-or-c++-regexp nil t)))) - (c++-ts-mode) - (c-ts-mode))) + (let ((mode + (if (save-excursion + (save-restriction + (save-match-data ; Why `save-match-data'? + (widen) + (goto-char (point-min)) + (re-search-forward c-ts-mode--c-or-c++-regexp nil t)))) + 'c++-ts-mode + 'c-ts-mode))) + (funcall (major-mode-remap mode)))) + ;; The entries for C++ must come first to prevent *.c files be taken ;; as C++ on case-insensitive filesystems, since *.C files are C++, ;; not C. (if (treesit-ready-p 'cpp) - (add-to-list 'auto-mode-alist - '("\\(\\.ii\\|\\.\\(CC?\\|HH?\\)\\|\\.[ch]\\(pp\\|xx\\|\\+\\+\\)\\|\\.\\(cc\\|hh\\)\\)\\'" - . c++-ts-mode))) + (add-to-list 'major-mode-remap-defaults + '(c++-mode . c++-ts-mode))) (when (treesit-ready-p 'c) - (add-to-list 'auto-mode-alist - '("\\(\\.[chi]\\|\\.lex\\|\\.y\\(acc\\)?\\)\\'" . c-ts-mode)) - (add-to-list 'auto-mode-alist '("\\.x[pb]m\\'" . c-ts-mode)) - ;; image-mode's association must be before the C mode, otherwise XPM - ;; images will be initially visited as C files. Also note that the - ;; regexp must be different from what files.el does, or else - ;; add-to-list will not add the association where we want it. - (add-to-list 'auto-mode-alist '("\\.x[pb]m\\'" . image-mode))) - -(if (and (treesit-ready-p 'cpp) - (treesit-ready-p 'c)) - (add-to-list 'auto-mode-alist '("\\.h\\'" . c-or-c++-ts-mode))) + (add-to-list 'major-mode-remap-defaults '(c++-mode . c++-ts-mode)) + (add-to-list 'major-mode-remap-defaults '(c-mode . c-ts-mode))) + +(when (and (treesit-ready-p 'cpp) + (treesit-ready-p 'c)) + (add-to-list 'major-mode-remap-defaults '(c-or-c++-mode . c-or-c++-ts-mode))) (provide 'c-ts-mode) (provide 'c++-ts-mode) |