summaryrefslogtreecommitdiff
path: root/lisp/progmodes/c-ts-mode.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/progmodes/c-ts-mode.el')
-rw-r--r--lisp/progmodes/c-ts-mode.el586
1 files changed, 586 insertions, 0 deletions
diff --git a/lisp/progmodes/c-ts-mode.el b/lisp/progmodes/c-ts-mode.el
new file mode 100644
index 00000000000..fc35d9aedda
--- /dev/null
+++ b/lisp/progmodes/c-ts-mode.el
@@ -0,0 +1,586 @@
+;;; c-ts-mode.el --- tree-sitter support for C and C++ -*- lexical-binding: t; -*-
+
+;; Copyright (C) 2022 Free Software Foundation, Inc.
+
+;; Author : Theodor Thornhill <theo@thornhill.no>
+;; Maintainer : Theodor Thornhill <theo@thornhill.no>
+;; Created : November 2022
+;; Keywords : c c++ cpp languages tree-sitter
+
+;; This file is part of GNU Emacs.
+
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+;;; Commentary:
+;;
+
+;;; Code:
+
+(require 'treesit)
+(require 'rx)
+
+(declare-function treesit-parser-create "treesit.c")
+(declare-function treesit-induce-sparse-tree "treesit.c")
+(declare-function treesit-node-parent "treesit.c")
+(declare-function treesit-node-start "treesit.c")
+(declare-function treesit-node-end "treesit.c")
+(declare-function treesit-node-child "treesit.c")
+(declare-function treesit-node-child-by-field-name "treesit.c")
+(declare-function treesit-node-type "treesit.c")
+
+(defcustom c-ts-mode-indent-offset 2
+ "Number of spaces for each indentation step in `c-ts-mode'."
+ :version "29.1"
+ :type 'integer
+ :safe 'integerp
+ :group 'c)
+
+(defcustom c-ts-mode-indent-style 'gnu
+ "Style used for indentation.
+
+The selected style could be one of GNU, K&R, LINUX or BSD. If
+one of the supplied styles doesn't suffice a function could be
+set instead. This function is expected return a list that
+follows the form of `treesit-simple-indent-rules'."
+ :version "29.1"
+ :type '(choice (symbol :tag "Gnu" 'gnu)
+ (symbol :tag "K&R" 'k&r)
+ (symbol :tag "Linux" 'linux)
+ (symbol :tag "BSD" 'bsd)
+ (function :tag "A function for user customized style" ignore))
+ :group 'c)
+
+(defvar c-ts-mode--syntax-table
+ (let ((table (make-syntax-table)))
+ ;; Taken from the cc-langs version
+ (modify-syntax-entry ?_ "_" table)
+ (modify-syntax-entry ?\\ "\\" table)
+ (modify-syntax-entry ?+ "." table)
+ (modify-syntax-entry ?- "." table)
+ (modify-syntax-entry ?= "." table)
+ (modify-syntax-entry ?% "." table)
+ (modify-syntax-entry ?< "." table)
+ (modify-syntax-entry ?> "." table)
+ (modify-syntax-entry ?& "." table)
+ (modify-syntax-entry ?| "." table)
+ (modify-syntax-entry ?\' "\"" table)
+ (modify-syntax-entry ?\240 "." table)
+ (modify-syntax-entry ?/ ". 124b" table)
+ (modify-syntax-entry ?* ". 23" table)
+ table)
+ "Syntax table for `c-ts-mode'.")
+
+(defun c-ts-mode--indent-styles (mode)
+ "Indent rules supported by `c-ts-mode'.
+MODE is either `c' or `cpp'."
+ (let ((common
+ `(((parent-is "translation_unit") parent-bol 0)
+ ((node-is ")") parent 1)
+ ((node-is "]") parent-bol 0)
+ ((node-is "}") (and parent parent-bol) 0)
+ ((node-is "else") parent-bol 0)
+ ((node-is "case") parent-bol 0)
+ ((node-is "preproc_arg") no-indent)
+ ((and (parent-is "comment") comment-end) comment-start -1)
+ ((parent-is "comment") comment-start-skip 0)
+ ((node-is "labeled_statement") parent-bol 0)
+ ((parent-is "labeled_statement") parent-bol c-ts-mode-indent-offset)
+ ((match "preproc_ifdef" "compound_statement") point-min 0)
+ ((match "#endif" "preproc_ifdef") point-min 0)
+ ((match "preproc_if" "compound_statement") point-min 0)
+ ((match "#endif" "preproc_if") point-min 0)
+ ((match "preproc_function_def" "compound_statement") point-min 0)
+ ((match "preproc_call" "compound_statement") point-min 0)
+ ((parent-is "compound_statement") (and parent parent-bol) c-ts-mode-indent-offset)
+ ((parent-is "function_definition") parent-bol 0)
+ ((parent-is "conditional_expression") first-sibling 0)
+ ((parent-is "assignment_expression") parent-bol c-ts-mode-indent-offset)
+ ((parent-is "comma_expression") first-sibling 0)
+ ((parent-is "init_declarator") parent-bol c-ts-mode-indent-offset)
+ ((parent-is "parenthesized_expression") first-sibling 1)
+ ((parent-is "argument_list") first-sibling 1)
+ ((parent-is "parameter_list") first-sibling 1)
+ ((parent-is "binary_expression") parent 0)
+ ((query "(for_statement initializer: (_) @indent)") parent-bol 5)
+ ((query "(for_statement condition: (_) @indent)") parent-bol 5)
+ ((query "(for_statement update: (_) @indent)") parent-bol 5)
+ ((query "(call_expression arguments: (_) @indent)") parent c-ts-mode-indent-offset)
+ ((parent-is "call_expression") parent 0)
+ ((parent-is "enumerator_list") parent-bol c-ts-mode-indent-offset)
+ ((parent-is "field_declaration_list") parent-bol c-ts-mode-indent-offset)
+ ((parent-is "initializer_list") parent-bol c-ts-mode-indent-offset)
+ ((parent-is "if_statement") parent-bol c-ts-mode-indent-offset)
+ ((parent-is "for_statement") parent-bol c-ts-mode-indent-offset)
+ ((parent-is "while_statement") parent-bol c-ts-mode-indent-offset)
+ ((parent-is "switch_statement") parent-bol c-ts-mode-indent-offset)
+ ((parent-is "case_statement") parent-bol c-ts-mode-indent-offset)
+ ((parent-is "do_statement") parent-bol c-ts-mode-indent-offset)
+ ,@(when (eq mode 'cpp)
+ `(((node-is "field_initializer_list") parent-bol ,(* c-ts-mode-indent-offset 2)))))))
+ `((gnu
+ ;; Prepend rules to set highest priority
+ ((match "while" "do_statement") parent 0)
+ ,@common)
+ (k&r ,@common)
+ (linux ,@common)
+ (bsd
+ ((parent-is "if_statement") parent-bol 0)
+ ((parent-is "for_statement") parent-bol 0)
+ ((parent-is "while_statement") parent-bol 0)
+ ((parent-is "switch_statement") parent-bol 0)
+ ((parent-is "case_statement") parent-bol 0)
+ ((parent-is "do_statement") parent-bol 0)
+ ,@common))))
+
+(defun c-ts-mode--set-indent-style (mode)
+ "Helper function to set indentation style.
+MODE is either `c' or `cpp'."
+ (let ((style
+ (if (functionp c-ts-mode-indent-style)
+ (funcall c-ts-mode-indent-style)
+ (pcase c-ts-mode-indent-style
+ ('gnu (alist-get 'gnu (c-ts-mode--indent-styles mode)))
+ ('k&r (alist-get 'k&r (c-ts-mode--indent-styles mode)))
+ ('bsd (alist-get 'bsd (c-ts-mode--indent-styles mode)))
+ ('linux (alist-get 'linux (c-ts-mode--indent-styles mode)))))))
+ `((,mode ,@style))))
+
+(defvar c-ts-mode--preproc-keywords
+ '("#define" "#if" "#ifdef" "#ifndef"
+ "#else" "#elif" "#endif" "#include")
+ "C/C++ keywords for tree-sitter font-locking.")
+
+(defun c-ts-mode--keywords (mode)
+ "C/C++ keywords for tree-sitter font-locking.
+MODE is either `c' or `cpp'."
+ (let ((c-keywords
+ '("break" "case" "const" "continue"
+ "default" "do" "else" "enum"
+ "extern" "for" "goto" "if"
+ "long" "register" "return" "short"
+ "signed" "sizeof" "static" "struct"
+ "switch" "typedef" "union" "unsigned"
+ "volatile" "while")))
+ (if (eq mode 'cpp)
+ (append c-keywords
+ '("and" "and_eq" "bitand" "bitor"
+ "catch" "class" "co_await" "co_return"
+ "co_yield" "compl" "concept" "consteval"
+ "constexpr" "constinit" "decltype" "delete"
+ "explicit" "final" "friend" "friend"
+ "mutable" "namespace" "new" "noexcept"
+ "not" "not_eq" "operator" "or"
+ "or_eq" "override" "private" "protected"
+ "public" "requires" "template" "throw"
+ "try" "typename" "using" "virtual"
+ "xor" "xor_eq"))
+ (append '("auto") c-keywords))))
+
+(defvar c-ts-mode--operators
+ '("=" "-" "*" "/" "+" "%" "~" "|" "&" "^" "<<" ">>" "->"
+ "." "<" "<=" ">=" ">" "==" "!=" "!" "&&" "||" "-="
+ "+=" "*=" "/=" "%=" "|=" "&=" "^=" ">>=" "<<=" "--" "++")
+ "C/C++ operators for tree-sitter font-locking.")
+
+(defun c-ts-mode--font-lock-settings (mode)
+ "Tree-sitter font-lock settings.
+MODE is either `c' or `cpp'."
+ (treesit-font-lock-rules
+ :language mode
+ :feature 'comment
+ `((comment) @font-lock-comment-face
+ (comment) @contextual)
+
+ :language mode
+ :feature 'preprocessor
+ `((preproc_directive) @font-lock-preprocessor-face
+
+ (preproc_def
+ name: (identifier) @font-lock-variable-name-face)
+
+ (preproc_ifdef
+ name: (identifier) @font-lock-variable-name-face)
+
+ (preproc_function_def
+ name: (identifier) @font-lock-function-name-face)
+
+ (preproc_params
+ (identifier) @font-lock-variable-name-face)
+
+ (preproc_defined) @font-lock-preprocessor-face
+ (preproc_defined (identifier) @font-lock-variable-name-face)
+ [,@c-ts-mode--preproc-keywords] @font-lock-preprocessor-face)
+
+ :language mode
+ :feature 'constant
+ `((true) @font-lock-constant-face
+ (false) @font-lock-constant-face
+ (null) @font-lock-constant-face
+ ,@(when (eq mode 'cpp)
+ '((this) @font-lock-constant-face)))
+
+ :language mode
+ :feature 'keyword
+ `([,@(c-ts-mode--keywords mode)] @font-lock-keyword-face
+ ,@(when (eq mode 'cpp)
+ '((auto) @font-lock-keyword-face)))
+
+ :language mode
+ :feature 'operator
+ `([,@c-ts-mode--operators] @font-lock-operator-face
+ "!" @font-lock-negation-char-face)
+
+ :language mode
+ :feature 'string
+ `((string_literal) @font-lock-string-face
+ (system_lib_string) @font-lock-string-face)
+
+ :language mode
+ :feature 'literal
+ `((number_literal) @font-lock-number-face
+ (char_literal) @font-lock-constant-face)
+
+ :language mode
+ :feature 'type
+ `((primitive_type) @font-lock-type-face
+ (type_identifier) @font-lock-type-face
+ (sized_type_specifier) @font-lock-type-face
+ ,@(when (eq mode 'cpp)
+ '((type_qualifier) @font-lock-type-face
+
+ (qualified_identifier
+ scope: (namespace_identifier) @font-lock-type-face)
+
+ (operator_cast) type: (type_identifier) @font-lock-type-face)))
+
+ :language mode
+ :feature 'definition
+ ;; Highlights identifiers in declarations.
+ `((declaration
+ declarator: (_) @c-ts-mode--fontify-declarator)
+
+ (field_declaration
+ declarator: (_) @c-ts-mode--fontify-declarator)
+
+ (function_definition
+ declarator: (_) @c-ts-mode--fontify-declarator))
+
+ ;; Should we highlight identifiers in the parameter list?
+ ;; (parameter_declaration
+ ;; declarator: (_) @c-ts-mode--fontify-declarator))
+
+ :language mode
+ :feature 'assignment
+ ;; TODO: Recursively highlight identifiers in parenthesized
+ ;; expressions, see `c-ts-mode--fontify-struct-declarator' for
+ ;; inspiration.
+ '((assignment_expression
+ left: (identifier) @font-lock-variable-name-face)
+ (assignment_expression
+ left: (field_expression field: (_) @font-lock-property-face))
+ (assignment_expression
+ left: (pointer_expression
+ (identifier) @font-lock-variable-name-face))
+ (assignment_expression
+ left: (subscript_expression
+ (identifier) @font-lock-variable-name-face))
+ (init_declarator declarator: (_) @c-ts-mode--fontify-declarator))
+
+ :language mode
+ :feature 'function
+ '((call_expression
+ function: (identifier) @font-lock-function-name-face))
+
+ :language mode
+ :feature 'variable
+ '((identifier) @c-ts-mode--fontify-variable)
+
+ :language mode
+ :feature 'label
+ '((labeled_statement
+ label: (statement_identifier) @font-lock-constant-face))
+
+ :language mode
+ :feature 'error
+ '((ERROR) @c-ts-fontify-error)
+
+ :feature 'escape-sequence
+ :language mode
+ :override t
+ '((escape_sequence) @font-lock-escape-face)
+
+ :language mode
+ :feature 'property
+ '((field_identifier) @font-lock-property-face
+ (enumerator
+ name: (identifier) @font-lock-property-face))
+
+ :language mode
+ :feature 'bracket
+ '((["(" ")" "[" "]" "{" "}"]) @font-lock-bracket-face)
+
+ :language mode
+ :feature 'delimiter
+ '((["," ":" ";"]) @font-lock-delimiter-face)
+
+ :language mode
+ :feature 'emacs-devel
+ :override t
+ '(((call_expression
+ (call_expression function: (identifier) @fn)
+ @c-ts-mode--fontify-defun)
+ (:match "^DEFUN$" @fn)))))
+
+(defun c-ts-mode--fontify-declarator (node override start end &rest args)
+ "Fontify a declarator (whatever under the \"declarator\" field).
+For NODE, OVERRIDE, START, END, and ARGS, see
+`treesit-font-lock-rules'."
+ (pcase (treesit-node-type node)
+ ((or "attributed_declarator" "parenthesized_declarator")
+ (apply #'c-ts-mode--fontify-declarator
+ (treesit-node-child node 0 t) override start end args))
+ ("pointer_declarator"
+ (apply #'c-ts-mode--fontify-declarator
+ (treesit-node-child node -1) override start end args))
+ ((or "function_declarator" "array_declarator" "init_declarator")
+ (apply #'c-ts-mode--fontify-declarator
+ (treesit-node-child-by-field-name node "declarator")
+ override start end args))
+ ((or "identifier" "field_identifier")
+ (treesit-fontify-with-override
+ (max (treesit-node-start node) start)
+ (min (treesit-node-end node) end)
+ (pcase (treesit-node-type (treesit-node-parent node))
+ ("function_declarator" 'font-lock-function-name-face)
+ (_ 'font-lock-variable-name-face))
+ override))))
+
+(defun c-ts-mode--fontify-variable (node override start end &rest _)
+ "Fontify an identifier node.
+Fontify it if NODE is not a function identifier. For NODE,
+OVERRIDE, START, END, and ARGS, see `treesit-font-lock-rules'."
+ (when (not (equal (treesit-node-type
+ (treesit-node-parent node))
+ "call_expression"))
+ (treesit-fontify-with-override
+ (max (treesit-node-start node) start)
+ (min (treesit-node-end node) end)
+ 'font-lock-variable-name-face
+ override)))
+
+(defun c-ts-mode--fontify-defun (node override start end &rest _)
+ "Correctly fontify the DEFUN macro.
+For NODE, OVERRIDE, START, and END, see
+`treesit-font-lock-rules'. The captured NODE is a
+call_expression where DEFUN is the function.
+
+This function corrects the fontification on the colon in
+\"doc:\", and the parameter list."
+ (let* ((parent (treesit-node-parent node))
+ ;; ARG-LIST-1 and 2 are like this:
+ ;;
+ ;; DEFUN (ARG-LIST-1)
+ ;; (ARG-LIST-2)
+ (arg-list-1 (treesit-node-children
+ (treesit-node-child-by-field-name
+ node "arguments")))
+ ;; ARG-LIST-2 is the
+ (arg-list-2 (treesit-node-children
+ (treesit-node-child-by-field-name
+ parent "arguments") t)))
+ ;; Fix the colon.
+ (dolist (node arg-list-1)
+ (when (equal (treesit-node-text node t) ":")
+ (treesit-fontify-with-override
+ (treesit-node-start node) (treesit-node-end node)
+ 'default override)))
+ ;; Fix the parameter list.
+ (while arg-list-2
+ (let ((type (and arg-list-2 (pop arg-list-2)))
+ (arg (and arg-list-2 (pop arg-list-2))))
+ (when type
+ (treesit-fontify-with-override
+ (max start (treesit-node-start type))
+ (min end (treesit-node-end type))
+ 'font-lock-type-face override))
+ (when arg
+ (treesit-fontify-with-override
+ (max start (treesit-node-start arg))
+ (min end (treesit-node-end arg))
+ 'default override))))))
+
+(defun c-ts-fontify-error (node override start end &rest _)
+ "Fontify the error nodes.
+For NODE, OVERRIDE, START, and END, see
+`treesit-font-lock-rules'."
+ (let ((parent (treesit-node-parent node))
+ (child (treesit-node-child node 0)))
+ (treesit-fontify-with-override
+ (max start (treesit-node-start node))
+ (min end (treesit-node-end node))
+ (cond
+ ;; This matches the case MACRO(struct a, b, c)
+ ;; where struct is seen as error.
+ ((and (equal (treesit-node-type child) "identifier")
+ (equal (treesit-node-type parent) "argument_list")
+ (member (treesit-node-text child)
+ '("struct" "long" "short" "enum" "union")))
+ 'font-lock-keyword-face)
+ (t 'font-lock-warning-face))
+ override)))
+
+(defun c-ts-mode--imenu-1 (node)
+ "Helper for `c-ts-mode--imenu'.
+Find string representation for NODE and set marker, then recurse
+the subtrees."
+ (let* ((ts-node (car node))
+ (subtrees (mapcan #'c-ts-mode--imenu-1 (cdr node)))
+ (name (when ts-node
+ (treesit-node-text
+ (pcase (treesit-node-type ts-node)
+ ("function_definition"
+ (treesit-node-child-by-field-name
+ (treesit-node-child-by-field-name
+ ts-node "declarator")
+ "declarator"))
+ ("declaration"
+ (let ((child (treesit-node-child ts-node -1 t)))
+ (pcase (treesit-node-type child)
+ ("identifier" child)
+ (_ (treesit-node-child-by-field-name
+ child "declarator")))))
+ ("struct_specifier"
+ (treesit-node-child-by-field-name
+ ts-node "name"))))))
+ (marker (when ts-node
+ (set-marker (make-marker)
+ (treesit-node-start ts-node)))))
+ (cond
+ ;; A struct_specifier could be inside a parameter list, another
+ ;; struct definition, a variable declaration, a function
+ ;; declaration. In those cases we don't include it.
+ ((string-match-p
+ (rx (or "parameter_declaration" "field_declaration"
+ "declaration" "function_definition"))
+ (or (treesit-node-type (treesit-node-parent ts-node))
+ ""))
+ nil)
+ ;; Ignore function local variable declarations.
+ ((and (equal (treesit-node-type ts-node) "declaration")
+ (not (equal (treesit-node-type (treesit-node-parent ts-node))
+ "translation_unit")))
+ nil)
+ ((or (null ts-node) (null name)) subtrees)
+ (subtrees
+ `((,name ,(cons name marker) ,@subtrees)))
+ (t
+ `((,name . ,marker))))))
+
+(defun c-ts-mode--imenu ()
+ "Return Imenu alist for the current buffer."
+ (let* ((node (treesit-buffer-root-node))
+ (func-tree (treesit-induce-sparse-tree
+ node "^function_definition$" nil 1000))
+ (var-tree (treesit-induce-sparse-tree
+ node "^declaration$" nil 1000))
+ (struct-tree (treesit-induce-sparse-tree
+ node "^struct_specifier$" nil 1000))
+ (func-index (c-ts-mode--imenu-1 func-tree))
+ (var-index (c-ts-mode--imenu-1 var-tree))
+ (struct-index (c-ts-mode--imenu-1 struct-tree)))
+ (append
+ (when struct-index `(("Struct" . ,struct-index)))
+ (when var-index `(("Variable" . ,var-index)))
+ (when func-index `(("Function" . ,func-index))))))
+
+;;;###autoload
+(define-derived-mode c-ts-mode--base-mode prog-mode "C"
+ "Major mode for editing C, powered by tree-sitter."
+ :syntax-table c-ts-mode--syntax-table
+
+ ;; Navigation.
+ (setq-local treesit-defun-type-regexp
+ (rx (or "specifier"
+ "definition")))
+
+ ;; Indent.
+ (when (eq c-ts-mode-indent-style 'linux)
+ (setq-local indent-tabs-mode t))
+
+ ;; Electric
+ (setq-local electric-indent-chars
+ (append "{}():;," electric-indent-chars))
+
+ ;; Imenu.
+ (setq-local imenu-create-index-function #'c-ts-mode--imenu)
+ (setq-local which-func-functions nil)
+
+ (setq-local treesit-font-lock-feature-list
+ '(( comment constant keyword literal preprocessor string)
+ ( assignment definition label property type)
+ ( delimiter error escape-sequence function
+ operator variable bracket))))
+
+;;;###autoload
+(define-derived-mode c-ts-mode c-ts-mode--base-mode "C"
+ "Major mode for editing C, powered by tree-sitter."
+ :group 'c
+
+ (unless (treesit-ready-p 'c)
+ (error "Tree-sitter for C isn't available"))
+
+ (treesit-parser-create 'c)
+
+ ;; Comments.
+ (setq-local comment-start "/* ")
+ (setq-local comment-start-skip "\\(?://+\\|/\\*+\\)\\s *")
+ (setq-local comment-end " */")
+ (setq-local treesit-comment-start (rx "/" (or (+ "/") (+ "*"))))
+ (setq-local treesit-comment-end (rx (+ (or "*")) "/"))
+
+ (setq-local treesit-simple-indent-rules
+ (c-ts-mode--set-indent-style 'c))
+
+ ;; Font-lock.
+ (setq-local treesit-font-lock-settings (c-ts-mode--font-lock-settings 'c))
+
+ (treesit-major-mode-setup))
+
+;;;###autoload
+(define-derived-mode c++-ts-mode c-ts-mode--base-mode "C++"
+ "Major mode for editing C++, powered by tree-sitter."
+ :group 'c++
+
+ (unless (treesit-ready-p 'cpp)
+ (error "Tree-sitter for C++ isn't available"))
+
+ ;; Comments.
+ (setq-local comment-start "// ")
+ (setq-local comment-start-skip "\\(?://+\\|/\\*+\\)\\s *")
+ (setq-local comment-end "")
+
+ (treesit-parser-create 'cpp)
+
+ (setq-local treesit-simple-indent-rules
+ (c-ts-mode--set-indent-style 'cpp))
+
+ ;; Font-lock.
+ (setq-local treesit-font-lock-settings (c-ts-mode--font-lock-settings 'cpp))
+
+ (treesit-major-mode-setup))
+
+(provide 'c-ts-mode)
+
+;;; c-ts-mode.el ends here