diff options
author | Stefan Kangas <stefankangas@gmail.com> | 2023-01-30 07:19:43 +0100 |
---|---|---|
committer | Stefan Kangas <stefankangas@gmail.com> | 2023-01-30 07:19:43 +0100 |
commit | 207a0d9408cb97b9ae78469e2487e3075ade03f8 (patch) | |
tree | a1981a27c6d4e0263cbaebfd36213a31c7744d7a | |
parent | 254c75fc2935e7edef079166d90b231278115a2f (diff) | |
parent | 8360e12f0ea3a3ccf0305adab3c7ea7e38af36c1 (diff) | |
download | emacs-207a0d9408cb97b9ae78469e2487e3075ade03f8.tar.gz |
Merge from origin/emacs-29
8360e12f0ea Update to Org 9.6.1-23-gc45a05
9058601308d ; * doc/emacs/misc.texi (Document View): Remove @footnote...
197f994384c Document tree-sitter features in the user manual
b73539832d9 ; Remove treesit--font-lock-fast-mode-grace-count
64fee21d5f8 Fix dockerfile-ts-mode line continuation indentation (bug...
1970726e26a Use treesit-subtree-stat to determine treesit--font-lock-...
382e018856a Add treesit-subtree-stat
a3003492ace Move c-ts-mode--statement-offset to c-ts-common.el
4b1714571c8 ; Fix byte-compile warnings in c-ts-mode.el
f50cb7d7c4b ; Improve docstring of c-ts-mode--indent-style-setter
1c3ca3bb649 Fix <> syntax in rust-ts-mode
56e8607dc99 Fix spurious errors on Windows when deleting temporary *....
2bd0b947538 Fix java class member without access modifier (bug#61115)
1de6ebf2878 Make treesit-font-lock-level a defcustom
6e50ee8bbb5 Add c-ts-mode-set-style and :set for c-ts-mode-indent-style
450db0587a9 Minor documentation improvements for outline-minor-mode (...
578e8926713 ; * doc/lispref/variables.texi (File Local Variables): Im...
bc5ee2b7bf0 ; * src/comp.c: Remove Local Variables section to avoid w...
362678d90e1 python.el: Use correct regexp when enabling python-ts-mode
76bb46db9df ; * doc/emacs/frames.texi (Mouse References): Improve ind...
# Conflicts:
# doc/emacs/programs.texi
# etc/NEWS
# lisp/progmodes/c-ts-mode.el
-rw-r--r-- | doc/emacs/display.texi | 131 | ||||
-rw-r--r-- | doc/emacs/emacs.texi | 4 | ||||
-rw-r--r-- | doc/emacs/files.texi | 11 | ||||
-rw-r--r-- | doc/emacs/frames.texi | 1 | ||||
-rw-r--r-- | doc/emacs/misc.texi | 6 | ||||
-rw-r--r-- | doc/emacs/programs.texi | 42 | ||||
-rw-r--r-- | doc/emacs/text.texi | 12 | ||||
-rw-r--r-- | doc/lispref/variables.texi | 1 | ||||
-rw-r--r-- | etc/NEWS.29 | 6 | ||||
-rw-r--r-- | lisp/emacs-lisp/comp.el | 7 | ||||
-rw-r--r-- | lisp/org/org-agenda.el | 8 | ||||
-rw-r--r-- | lisp/org/org-macs.el | 2 | ||||
-rw-r--r-- | lisp/org/org-version.el | 2 | ||||
-rw-r--r-- | lisp/org/org.el | 1 | ||||
-rw-r--r-- | lisp/org/ox.el | 4 | ||||
-rw-r--r-- | lisp/progmodes/c-ts-common.el | 118 | ||||
-rw-r--r-- | lisp/progmodes/c-ts-mode.el | 204 | ||||
-rw-r--r-- | lisp/progmodes/dockerfile-ts-mode.el | 20 | ||||
-rw-r--r-- | lisp/progmodes/java-ts-mode.el | 1 | ||||
-rw-r--r-- | lisp/progmodes/python.el | 4 | ||||
-rw-r--r-- | lisp/progmodes/rust-ts-mode.el | 26 | ||||
-rw-r--r-- | lisp/treesit.el | 107 | ||||
-rw-r--r-- | src/comp.c | 3 | ||||
-rw-r--r-- | src/treesit.c | 63 | ||||
-rw-r--r-- | test/lisp/progmodes/c-ts-mode-resources/indent.erts | 8 |
25 files changed, 566 insertions, 226 deletions
diff --git a/doc/emacs/display.texi b/doc/emacs/display.texi index f77ab569483..97732b65e32 100644 --- a/doc/emacs/display.texi +++ b/doc/emacs/display.texi @@ -1024,17 +1024,65 @@ customize-group @key{RET} font-lock-faces @key{RET}}. You can then use that customization buffer to customize the appearance of these faces. @xref{Face Customization}. +@cindex just-in-time (JIT) font-lock +@cindex background syntax highlighting + Fontifying very large buffers can take a long time. To avoid large +delays when a file is visited, Emacs initially fontifies only the +visible portion of a buffer. As you scroll through the buffer, each +portion that becomes visible is fontified as soon as it is displayed; +this type of Font Lock is called @dfn{Just-In-Time} (or @dfn{JIT}) +Lock. You can control how JIT Lock behaves, including telling it to +perform fontification while idle, by customizing variables in the +customization group @samp{jit-lock}. @xref{Specific Customization}. + + The information that major modes use for determining which parts of +buffer text to fontify and what faces to use can be based on several +different ways of analyzing the text: + +@itemize @bullet +@item +Search for keywords and other textual patterns based on regular +expressions (@pxref{Regexp Search,, Regular Expression Search}). + +@item +Find syntactically distinct parts of text based on built-in syntax +tables (@pxref{Syntax Tables,,, elisp, The Emacs Lisp Reference +Manual}). + +@item +Use syntax tree produced by a full-blown parser, via a special-purpose +library, such as the tree-sitter library (@pxref{Parsing Program +Source,,, elisp, The Emacs Lisp Reference Manual}), or an external +program. +@end itemize + +@menu +* Traditional Font Lock:: Font Lock based on regexps and syntax tables. +* Parser-based Font Lock:: Font Lock based on external parser. +@end menu + +@node Traditional Font Lock +@subsection Traditional Font Lock +@cindex traditional font-lock + + ``Traditional'' methods of providing font-lock information are based +on regular-expression search and on syntactic analysis using syntax +tables built into Emacs. This subsection describes the use and +customization of font-lock for major modes which use these traditional +methods. + @vindex font-lock-maximum-decoration - You can customize the variable @code{font-lock-maximum-decoration} -to alter the amount of fontification applied by Font Lock mode, for -major modes that support this feature. The value should be a number -(with 1 representing a minimal amount of fontification; some modes -support levels as high as 3); or @code{t}, meaning ``as high as -possible'' (the default). To be effective for a given file buffer, -the customization of @code{font-lock-maximum-decoration} should be -done @emph{before} the file is visited; if you already have the file -visited in a buffer when you customize this variable, kill the buffer -and visit the file again after the customization. + You can control the amount of fontification applied by Font Lock +mode by customizing the variable @code{font-lock-maximum-decoration}, +for major modes that support this feature. The value of this variable +should be a number (with 1 representing a minimal amount of +fontification; some modes support levels as high as 3); or @code{t}, +meaning ``as high as possible'' (the default). To be effective for a +given file buffer, the customization of +@code{font-lock-maximum-decoration} should be done @emph{before} the +file is visited; if you already have the file visited in a buffer when +you customize this variable, kill the buffer and visit the file again +after the customization. You can also specify different numbers for particular major modes; for example, to use level 1 for C/C++ modes, and the default level @@ -1082,16 +1130,59 @@ keywords by customizing the @code{font-lock-ignore} option, @pxref{Customizing Keywords,,, elisp, The Emacs Lisp Reference Manual}. -@cindex just-in-time (JIT) font-lock -@cindex background syntax highlighting - Fontifying large buffers can take a long time. To avoid large -delays when a file is visited, Emacs initially fontifies only the -visible portion of a buffer. As you scroll through the buffer, each -portion that becomes visible is fontified as soon as it is displayed; -this type of Font Lock is called @dfn{Just-In-Time} (or @dfn{JIT}) -Lock. You can control how JIT Lock behaves, including telling it to -perform fontification while idle, by customizing variables in the -customization group @samp{jit-lock}. @xref{Specific Customization}. +@node Parser-based Font Lock +@subsection Parser-based Font Lock +@cindex font-lock via tree-sitter +@cindex parser-based font-lock + If your Emacs was built with the tree-sitter library, it can use the +results of parsing the buffer text by that library for the purposes of +fontification. This is usually faster and more accurate than the +``traditional'' methods described in the previous subsection, since +the tree-sitter library provides full-blown parsers for programming +languages and other kinds of formatted text which it supports. Major +modes which utilize the tree-sitter library are named +@code{@var{foo}-ts-mode}, with the @samp{-ts-} part indicating the use +of the library. This subsection documents the Font Lock support based +on the tree-sitter library. + +@vindex treesit-font-lock-level + You can control the amount of fontification applied by Font Lock +mode of major modes based on tree-sitter by customizing the variable +@code{treesit-font-lock-level}. Its value is a number between 1 and +4: + +@table @asis +@item Level 1 +This level usually fontifies only comments and function names in +function definitions. +@item Level 2 +This level adds fontification of keywords, strings, and data types. +@item Level 3 +This is the default level; it adds fontification of assignments, +numbers, properties, etc. +@item Level 4 +This level adds everything else that can be fontified: operators, +delimiters, brackets, other punctuation, function names in function +calls, variables, etc. +@end table + +@vindex treesit-font-lock-feature-list +@noindent +What exactly constitutes each of the syntactical categories mentioned +above depends on the major mode and the parser grammar used by +tree-sitter for the major-mode's language. However, in general the +categories follow the conventions of the programming language or the +file format supported by the major mode. The buffer-local value of +the variable @code{treesit-font-lock-feature-list} holds the +fontification features supported by a tree-sitter based major mode, +where each sub-list shows the features provided by the corresponding +fontification level. + + Once you change the value of @code{treesit-font-lock-level} via +@w{@kbd{M-x customize-variable}} (@pxref{Specific Customization}), it +takes effect immediately in all the existing buffers and for files you +visit in the future in the same session. + @node Highlight Interactively @section Interactive Highlighting diff --git a/doc/emacs/emacs.texi b/doc/emacs/emacs.texi index b6d149eb3ef..7071ea44edd 100644 --- a/doc/emacs/emacs.texi +++ b/doc/emacs/emacs.texi @@ -383,6 +383,10 @@ Controlling the Display * Visual Line Mode:: Word wrap and screen line-based editing. * Display Custom:: Information on variables for customizing display. +Font Lock +* Traditional Font Lock:: Font Lock based on regexps and syntax tables. +* Parser-based Font Lock:: Font Lock based on external parser. + Searching and Replacement * Incremental Search:: Search happens as you type the string. diff --git a/doc/emacs/files.texi b/doc/emacs/files.texi index 42e252c417b..664b9d5d9a3 100644 --- a/doc/emacs/files.texi +++ b/doc/emacs/files.texi @@ -215,6 +215,17 @@ by the integers that Emacs can represent (@pxref{Buffers}). If you try, Emacs displays an error message saying that the maximum buffer size has been exceeded. +@vindex treesit-max-buffer-size + If you try to visit a file whose major mode (@pxref{Major Modes}) +uses the tree-sitter parsing library, Emacs will display a warning if +the file's size in bytes is larger than the value of the variable +@code{treesit-max-buffer-size}. The default value is 40 megabytes for +64-bit Emacs and 15 megabytes for 32-bit Emacs. This avoids the +danger of having Emacs run out of memory by preventing the activation +of major modes based on tree-sitter in such large buffers, because a +typical tree-sitter parser needs about 10 times as much memory as the +text it parses. + @cindex wildcard characters in file names @vindex find-file-wildcards If the file name you specify contains shell-style wildcard diff --git a/doc/emacs/frames.texi b/doc/emacs/frames.texi index 3ee6eb59dbb..ce631561be7 100644 --- a/doc/emacs/frames.texi +++ b/doc/emacs/frames.texi @@ -334,6 +334,7 @@ In this way, you can use the mouse to move point over a button without activating it. Dragging the mouse over or onto a button has its usual behavior of setting the region, and does not activate the button. +@vindex mouse-1-click-follows-link You can change how @kbd{mouse-1} applies to buttons by customizing the variable @code{mouse-1-click-follows-link}. If the value is a positive integer, that determines how long you need to hold the mouse diff --git a/doc/emacs/misc.texi b/doc/emacs/misc.texi index 3ee8ee5ee39..a5e1689b6c7 100644 --- a/doc/emacs/misc.texi +++ b/doc/emacs/misc.texi @@ -470,11 +470,7 @@ documents. It provides features such as slicing, zooming, and searching inside documents. It works by converting the document to a set of images using the @command{gs} (GhostScript) or @command{pdfdraw}/@command{mutool draw} (MuPDF) commands and other -external tools @footnote{PostScript files require GhostScript, DVI -files require @code{dvipdf} or @code{dvipdfm}, OpenDocument and -Microsoft Office documents require the @code{unoconv} tool, and EPUB, -CBZ, FB2, XPS and OXPS files require @code{mutool} to be available.}, -and displaying those images. +external tools, and then displays those converted images. @findex doc-view-toggle-display @findex doc-view-minor-mode diff --git a/doc/emacs/programs.texi b/doc/emacs/programs.texi index 065ed1c51f7..d983c2b59c6 100644 --- a/doc/emacs/programs.texi +++ b/doc/emacs/programs.texi @@ -255,6 +255,17 @@ they do their standard jobs in a way better fitting a particular language. Other major modes may replace any or all of these key bindings for that purpose. +@cindex nested defuns +@vindex treesit-defun-tactic + Some programming languages supported @dfn{nested defuns}, whereby a +defun (such as a function or a method or a class) can be defined +inside (i.e., as part of the body) of another defun. The commands +described above by default find the beginning and the end of the +@emph{innermost} defun around point. Major modes based on the +tree-sitter library provide control of this behavior: if the variable +@code{treesit-defun-tactic} is set to the value @code{top-level}, the +defun commands will find the @emph{outermost} defuns instead. + @node Moving by Sentences @subsection Moving by Sentences @cindex sentences, in programming languages @@ -599,15 +610,19 @@ then indent it like this: @item C-c C-q @kindex C-c C-q @r{(C mode)} @findex c-indent-defun +@findex c-ts-mode-indent-defun Reindent the current top-level function definition or aggregate type -declaration (@code{c-indent-defun}). +declaration (@code{c-indent-defun} in CC mode, +@code{c-ts-mode-indent-defun} in @code{c-ts-mode} based on tree-sitter). @item C-M-q @kindex C-M-q @r{(C mode)} @findex c-indent-exp -Reindent each line in the balanced expression that follows point -(@code{c-indent-exp}). A prefix argument inhibits warning messages -about invalid syntax. +@findex prog-indent-sexp +Reindent each line in the balanced expression that follows point. In +CC mode, this invokes @code{c-indent-exp}; in tree-sitter based +@code{c-ts-mode} this invokes a more general @code{prog-indent-sexp}. +A prefix argument inhibits warning messages about invalid syntax. @item @key{TAB} @findex c-indent-line-or-region @@ -647,7 +662,8 @@ onto the indentation of the @dfn{anchor statement}. @table @kbd @item C-c . @var{style} @key{RET} -Select a predefined style @var{style} (@code{c-set-style}). +Select a predefined style @var{style} (@code{c-set-style} in CC mode, +@code{c-ts-mode-set-style} in @code{c-ts-mode} based on tree-sitter). @end table A @dfn{style} is a named collection of customizations that can be @@ -663,6 +679,7 @@ typing @kbd{C-M-q} at the start of a function definition. @kindex C-c . @r{(C mode)} @findex c-set-style +@findex c-ts-mode-set-style To choose a style for the current buffer, use the command @w{@kbd{C-c .}}. Specify a style name as an argument (case is not significant). This command affects the current buffer only, and it affects only @@ -671,11 +688,11 @@ the code already in the buffer. To reindent the whole buffer in the new style, you can type @kbd{C-x h C-M-\}. @vindex c-default-style - You can also set the variable @code{c-default-style} to specify the -default style for various major modes. Its value should be either the -style's name (a string) or an alist, in which each element specifies -one major mode and which indentation style to use for it. For -example, + When using CC mode, you can also set the variable +@code{c-default-style} to specify the default style for various major +modes. Its value should be either the style's name (a string) or an +alist, in which each element specifies one major mode and which +indentation style to use for it. For example, @example (setq c-default-style @@ -692,6 +709,11 @@ one of the C-like major modes; thus, if you specify a new default style for Java mode, you can make it take effect in an existing Java mode buffer by typing @kbd{M-x java-mode} there. +@vindex c-ts-mode-indent-style + When using the tree-sitter based @code{c-ts-mode}, you can set the +default indentation style by customizing the variable +@code{c-ts-mode-indent-style}. + The @code{gnu} style specifies the formatting recommended by the GNU Project for C; it is the default, so as to encourage use of our recommended style. diff --git a/doc/emacs/text.texi b/doc/emacs/text.texi index 78e89d8031a..18f2274cfa6 100644 --- a/doc/emacs/text.texi +++ b/doc/emacs/text.texi @@ -1021,14 +1021,16 @@ this variable is @code{insert}, the buttons are inserted directly into the buffer text, so @key{RET} on the button will also toggle display of the section, like a mouse click does. If the value is @code{in-margins}, Outline minor mode will use the window margins to -indicate that a section is hidden. +indicate that a section is hidden. The buttons are customizable as icons +(@pxref{Icons}). @vindex outline-minor-mode-cycle If the @code{outline-minor-mode-cycle} user option is -non-@code{nil}, the @kbd{TAB} and @kbd{S-@key{TAB}} keys are enabled on the -outline heading lines. @kbd{TAB} cycles hiding, showing the -sub-heading, and showing all for the current section. @kbd{S-@key{TAB}} -does the same for the entire buffer. +non-@code{nil}, the @kbd{TAB} and @kbd{S-@key{TAB}} keys that cycle +the visibility are enabled on the outline heading lines +(@pxref{Outline Visibility, outline-cycle}). @kbd{TAB} cycles hiding, +showing the sub-heading, and showing all for the current section. +@kbd{S-@key{TAB}} does the same for the entire buffer. @node Outline Format @subsection Format of Outlines diff --git a/doc/lispref/variables.texi b/doc/lispref/variables.texi index 39d0906f6c4..5584cbce9a6 100644 --- a/doc/lispref/variables.texi +++ b/doc/lispref/variables.texi @@ -2023,6 +2023,7 @@ file-local variables stored in @code{file-local-variables-alist}. @end defvar @cindex safe local variable +@cindex @code{safe-local-variable}, property of variable You can specify safe values for a variable with a @code{safe-local-variable} property. The property has to be a function of one argument; any value is safe if the function returns diff --git a/etc/NEWS.29 b/etc/NEWS.29 index 4d199676848..fb211f9b7d0 100644 --- a/etc/NEWS.29 +++ b/etc/NEWS.29 @@ -1018,6 +1018,8 @@ quotes removed. --- *** 'M-x apropos-variable' output now includes values of variables. +Such apropos buffer is more easily viewed with outlining after +enabling 'outline-minor-mode' in 'apropos-mode'. +++ *** New docstring syntax to indicate that symbols shouldn't be links. @@ -2480,6 +2482,10 @@ matches. --- *** New function 'xref-show-xrefs'. +*** 'outline-minor-mode' is supported in Xref buffers. +You can enable outlining by adding 'outline-minor-mode' to +'xref-after-update-hook'. + ** File Notifications +++ diff --git a/lisp/emacs-lisp/comp.el b/lisp/emacs-lisp/comp.el index 4c423be06c4..d2e7d933f4f 100644 --- a/lisp/emacs-lisp/comp.el +++ b/lisp/emacs-lisp/comp.el @@ -4112,13 +4112,16 @@ the deferred compilation mechanism." (native-elisp-load data))) ;; We may have created a temporary file when we're being ;; called with something other than a file as the argument. - ;; Delete it. + ;; Delete it if we can. (when (and (not (stringp function-or-file)) (not output) comp-ctxt (comp-ctxt-output comp-ctxt) (file-exists-p (comp-ctxt-output comp-ctxt))) - (delete-file (comp-ctxt-output comp-ctxt)))))))) + (cond ((eq 'windows-nt system-type) + ;; We may still be using the temporary .eln file. + (ignore-errors (delete-file (comp-ctxt-output comp-ctxt)))) + (t (delete-file (comp-ctxt-output comp-ctxt)))))))))) (defun native-compile-async-skip-p (file load selector) "Return non-nil if FILE's compilation should be skipped. diff --git a/lisp/org/org-agenda.el b/lisp/org/org-agenda.el index 2d194ad3413..63107e8e6a4 100644 --- a/lisp/org/org-agenda.el +++ b/lisp/org/org-agenda.el @@ -3474,13 +3474,17 @@ This ensures the export commands can easily use it." (when (setq tmp (plist-get props 'date)) (when (integerp tmp) (setq tmp (calendar-gregorian-from-absolute tmp))) (let ((calendar-date-display-form - '(year "-" (string-pad month 2 ?0 'left) "-" (string-pad day 2 ?0 'left)))) + '((format "%s-%.2d-%.2d" year + (string-to-number month) + (string-to-number day))))) (setq tmp (calendar-date-string tmp))) (setq props (plist-put props 'date tmp))) (when (setq tmp (plist-get props 'day)) (when (integerp tmp) (setq tmp (calendar-gregorian-from-absolute tmp))) (let ((calendar-date-display-form - '(year "-" (string-pad month 2 ?0 'left) "-" (string-pad day 2 ?0 'left)))) + '((format "%s-%.2d-%.2d" year + (string-to-number month) + (string-to-number day))))) (setq tmp (calendar-date-string tmp))) (setq props (plist-put props 'day tmp)) (setq props (plist-put props 'agenda-day tmp))) diff --git a/lisp/org/org-macs.el b/lisp/org/org-macs.el index 07c668a807d..8d7b0b034f8 100644 --- a/lisp/org/org-macs.el +++ b/lisp/org/org-macs.el @@ -46,7 +46,7 @@ ;; `org-git-version' check because the generated Org version strings ;; will not match. `(unless (equal (org-release) ,(org-release)) - (warn "Org version mismatch. Make sure that correct `load-path' is set early in init.el + (warn "Org version mismatch. Org loading aborted. This warning usually appears when a built-in Org version is loaded prior to the more recent Org version. diff --git a/lisp/org/org-version.el b/lisp/org/org-version.el index 22f952d7a30..8372a0be4a5 100644 --- a/lisp/org/org-version.el +++ b/lisp/org/org-version.el @@ -11,7 +11,7 @@ Inserted by installing Org mode or when a release is made." (defun org-git-version () "The Git version of Org mode. Inserted by installing Org or when a release is made." - (let ((org-git-version "release_9.6.1-16-ge37e9b")) + (let ((org-git-version "release_9.6.1-23-gc45a05")) org-git-version)) (provide 'org-version) diff --git a/lisp/org/org.el b/lisp/org/org.el index 153e860f9a5..1b829d837c7 100644 --- a/lisp/org/org.el +++ b/lisp/org/org.el @@ -8608,6 +8608,7 @@ or to another Org file, automatically push the old position onto the ring." (defvar org-agenda-buffer-name) (defun org-follow-timestamp-link () "Open an agenda view for the time-stamp date/range at point." + (require 'org-agenda) ;; Avoid changing the global value. (let ((org-agenda-buffer-name org-agenda-buffer-name)) (cond diff --git a/lisp/org/ox.el b/lisp/org/ox.el index 65f9ff18279..6f819def93a 100644 --- a/lisp/org/ox.el +++ b/lisp/org/ox.el @@ -6600,14 +6600,14 @@ see. Optional argument POST-PROCESS is a function which should accept no argument. It is always called within the current process, from BUFFER, with point at its beginning. Export back-ends can -use it to set a major mode there, e.g, +use it to set a major mode there, e.g., (defun org-latex-export-as-latex (&optional async subtreep visible-only body-only ext-plist) (interactive) (org-export-to-buffer \\='latex \"*Org LATEX Export*\" async subtreep visible-only body-only ext-plist - #'LaTeX-mode)) + #\\='LaTeX-mode)) When expressed as an anonymous function, using `lambda', POST-PROCESS needs to be quoted. diff --git a/lisp/progmodes/c-ts-common.el b/lisp/progmodes/c-ts-common.el index 6671d4be5b6..2d4a0d41c2a 100644 --- a/lisp/progmodes/c-ts-common.el +++ b/lisp/progmodes/c-ts-common.el @@ -2,7 +2,7 @@ ;; Copyright (C) 2023 Free Software Foundation, Inc. -;; Author : 付禹安 (Yuan Fu) <casouri@gmail.com> +;; Maintainer : 付禹安 (Yuan Fu) <casouri@gmail.com> ;; Keywords : c c++ java javascript rust languages tree-sitter ;; This file is part of GNU Emacs. @@ -22,7 +22,10 @@ ;;; Commentary: ;; -;; For C-like language major modes: +;; This file contains functions that can be shared by C-like language +;; major modes, like indenting and filling "/* */" block comments. +;; +;; For indenting and filling comments: ;; ;; - Use `c-ts-common-comment-setup' to setup comment variables and ;; filling. @@ -30,6 +33,14 @@ ;; - Use simple-indent matcher `c-ts-common-looking-at-star' and ;; anchor `c-ts-common-comment-start-after-first-star' for indenting ;; block comments. See `c-ts-mode--indent-styles' for example. +;; +;; For indenting statements: +;; +;; - Set `c-ts-common-indent-offset', +;; `c-ts-common-indent-block-type-regexp', and +;; `c-ts-common-indent-bracketless-type-regexp', then use simple-indent +;; offset `c-ts-common-statement-offset' in +;; `treesit-simple-indent-rules'. ;;; Code: @@ -40,6 +51,8 @@ (declare-function treesit-node-end "treesit.c") (declare-function treesit-node-type "treesit.c") +;;; Comment indentation and filling + (defun c-ts-common-looking-at-star (_n _p bol &rest _) "A tree-sitter simple indent matcher. Matches if there is a \"*\" after BOL." @@ -242,6 +255,107 @@ Set up: (setq-local paragraph-separate paragraph-start) (setq-local fill-paragraph-function #'c-ts-common--fill-paragraph)) +;;; Statement indent + +(defvar c-ts-common-indent-offset nil + "Indent offset used by `c-ts-common' indent functions. + +This should be the symbol of the indent offset variable for the +particular major mode. This cannot be nil for `c-ts-common' +statement indent functions to work.") + +(defvar c-ts-common-indent-block-type-regexp nil + "Regexp matching types of block nodes (i.e., {} blocks). + +This cannot be nil for `c-ts-common' statement indent functions +to work.") + +(defvar c-ts-common-indent-bracketless-type-regexp nil + "A regexp matching types of bracketless constructs. + +These constructs include if, while, do-while, for statements. In +these statements, the body can omit the bracket, which requires +special handling from our bracket-counting indent algorithm. + +This can be nil, meaning such special handling is not needed.") + +(defun c-ts-common-statement-offset (node parent &rest _) + "This anchor is used for children of a statement inside a block. + +This function basically counts the number of block nodes (i.e., +brackets) (defined by `c-ts-mode--indent-block-type-regexp') +between NODE and the root node (not counting NODE itself), and +multiply that by `c-ts-common-indent-offset'. + +To support GNU style, on each block level, this function also +checks whether the opening bracket { is on its own line, if so, +it adds an extra level, except for the top-level. + +PARENT is NODE's parent." + (let ((level 0)) + ;; If point is on an empty line, NODE would be nil, but we pretend + ;; there is a statement node. + (when (null node) + (setq node t)) + ;; If NODE is a opening bracket on its own line, take off one + ;; level because the code below assumes NODE is a statement + ;; _inside_ a {} block. + (when (string-match-p c-ts-common-indent-block-type-regexp + (treesit-node-type node)) + (cl-decf level)) + ;; Go up the tree and compute indent level. + (while (if (eq node t) + (setq node parent) + node) + (when (string-match-p c-ts-common-indent-block-type-regexp + (treesit-node-type node)) + (cl-incf level) + (save-excursion + (goto-char (treesit-node-start node)) + ;; Add an extra level if the opening bracket is on its own + ;; line, except (1) it's at top-level, or (2) it's immediate + ;; parent is another block. + (cond ((bolp) nil) ; Case (1). + ((let ((parent-type (treesit-node-type + (treesit-node-parent node)))) + ;; Case (2). + (and parent-type + (or (string-match-p + c-ts-common-indent-block-type-regexp + parent-type)))) + nil) + ;; Add a level. + ((looking-back (rx bol (* whitespace)) + (line-beginning-position)) + (cl-incf level))))) + (setq level (c-ts-mode--fix-bracketless-indent level node)) + ;; Go up the tree. + (setq node (treesit-node-parent node))) + (* level (symbol-value c-ts-common-indent-offset)))) + +(defun c-ts-mode--fix-bracketless-indent (level node) + "Takes LEVEL and NODE and return adjusted LEVEL. +This fixes indentation for cases shown in bug#61026. Basically +in C-like syntax, statements like if, for, while sometimes omit +the bracket in the body." + (let ((block-re c-ts-common-indent-block-type-regexp) + (statement-re + c-ts-common-indent-bracketless-type-regexp) + (node-type (treesit-node-type node)) + (parent-type (treesit-node-type (treesit-node-parent node)))) + (if (and block-re statement-re node-type parent-type + (not (string-match-p block-re node-type)) + (string-match-p statement-re parent-type)) + (1+ level) + level))) + +(defun c-ts-mode--close-bracket-offset (node parent &rest _) + "Offset for the closing bracket, NODE. +It's basically one level less that the statements in the block. +PARENT is NODE's parent." + (- (c-ts-common-statement-offset node parent) + (symbol-value c-ts-common-indent-offset))) + (provide 'c-ts-common) ;;; c-ts-common.el ends here diff --git a/lisp/progmodes/c-ts-mode.el b/lisp/progmodes/c-ts-mode.el index 76ac92ed82d..8e9852ed4ee 100644 --- a/lisp/progmodes/c-ts-mode.el +++ b/lisp/progmodes/c-ts-mode.el @@ -63,11 +63,6 @@ ;; will set up Emacs to use the C/C++ modes defined here for other ;; files, provided that you have the corresponding parser grammar ;; libraries installed. -;; -;; - Use variable `c-ts-mode-indent-block-type-regexp' with indent -;; offset c-ts-mode--statement-offset for indenting statements. -;; Again, see `c-ts-mode--indent-styles' for example. -;; ;;; Code: @@ -92,6 +87,28 @@ :safe 'integerp :group 'c) +(defun c-ts-mode--indent-style-setter (sym val) + "Custom setter for `c-ts-mode-set-style'. +Apart from setting the default value of SYM to VAL, also change +the value of SYM in `c-ts-mode' and `c++-ts-mode' buffers to VAL." + (set-default sym val) + (named-let loop ((res nil) + (buffers (buffer-list))) + (if (null buffers) + (mapc (lambda (b) + (with-current-buffer b + (setq-local treesit-simple-indent-rules + (treesit--indent-rules-optimize + (c-ts-mode--get-indent-style + (if (eq major-mode 'c-ts-mode) 'c 'cpp)))))) + res) + (let ((buffer (car buffers))) + (with-current-buffer buffer + ;; FIXME: Should we use `derived-mode-p' here? + (if (or (eq major-mode 'c-ts-mode) (eq major-mode 'c++-ts-mode)) + (loop (append res (list buffer)) (cdr buffers)) + (loop res (cdr buffers)))))))) + (defcustom c-ts-mode-indent-style 'gnu "Style used for indentation. @@ -100,13 +117,42 @@ one of the supplied styles doesn't suffice a function could be set instead. This function is expected return a list that follows the form of `treesit-simple-indent-rules'." :version "29.1" - :type '(choice (symbol :tag "Gnu" 'gnu) - (symbol :tag "K&R" 'k&r) - (symbol :tag "Linux" 'linux) - (symbol :tag "BSD" 'bsd) + :type '(choice (symbol :tag "Gnu" gnu) + (symbol :tag "K&R" k&r) + (symbol :tag "Linux" linux) + (symbol :tag "BSD" bsd) (function :tag "A function for user customized style" ignore)) + :set #'c-ts-mode--indent-style-setter :group 'c) +(defun c-ts-mode--get-indent-style (mode) + "Helper function to set indentation style. +MODE is either `c' or `cpp'." + (let ((style + (if (functionp c-ts-mode-indent-style) + (funcall c-ts-mode-indent-style) + (alist-get c-ts-mode-indent-style (c-ts-mode--indent-styles mode))))) + `((,mode ,@style)))) + +(defun c-ts-mode-set-style () + "Set the indent style of C/C++ modes globally. + +This changes the current indent style of every C/C++ buffer and +the default C/C++ indent style in this Emacs session." + (interactive) + ;; FIXME: Should we use `derived-mode-p' here? + (or (eq major-mode 'c-ts-mode) (eq major-mode 'c++-ts-mode) + (error "Buffer %s is not a c-ts-mode (c-ts-mode-set-style)" + (buffer-name))) + (c-ts-mode--indent-style-setter + 'c-ts-mode-indent-style + ;; NOTE: We can probably use the interactive form for this. + (intern + (completing-read + "Select style: " + (mapcar #'car (c-ts-mode--indent-styles (if (eq major-mode 'c-ts-mode) 'c 'cpp))) + nil t nil nil "gnu")))) + ;;; Syntax table (defvar c-ts-mode--syntax-table @@ -177,7 +223,7 @@ MODE is either `c' or `cpp'." ;; Labels. ((node-is "labeled_statement") parent-bol 0) ((parent-is "labeled_statement") - point-min c-ts-mode--statement-offset) + point-min c-ts-common-statement-offset) ((match "preproc_ifdef" "compound_statement") point-min 0) ((match "#endif" "preproc_ifdef") point-min 0) @@ -186,15 +232,6 @@ MODE is either `c' or `cpp'." ((match "preproc_function_def" "compound_statement") point-min 0) ((match "preproc_call" "compound_statement") point-min 0) - ;; {} blocks. - ((node-is "}") point-min c-ts-mode--close-bracket-offset) - ((parent-is "compound_statement") - point-min c-ts-mode--statement-offset) - ((parent-is "enumerator_list") - point-min c-ts-mode--statement-offset) - ((parent-is "field_declaration_list") - point-min c-ts-mode--statement-offset) - ((parent-is "function_definition") parent-bol 0) ((parent-is "conditional_expression") first-sibling 0) ((parent-is "assignment_expression") parent-bol c-ts-mode-indent-offset) @@ -215,13 +252,16 @@ MODE is either `c' or `cpp'." ;; Indent the body of namespace definitions. ((parent-is "declaration_list") parent-bol c-ts-mode-indent-offset))) + ;; int[5] a = { 0, 0, 0, 0 }; ((parent-is "initializer_list") parent-bol c-ts-mode-indent-offset) - ((parent-is "if_statement") parent-bol c-ts-mode-indent-offset) - ((parent-is "for_statement") parent-bol c-ts-mode-indent-offset) - ((parent-is "while_statement") parent-bol c-ts-mode-indent-offset) - ((parent-is "switch_statement") parent-bol c-ts-mode-indent-offset) - ((parent-is "case_statement") parent-bol c-ts-mode-indent-offset) - ((parent-is "do_statement") parent-bol c-ts-mode-indent-offset) + ((parent-is "enumerator_list") point-min c-ts-common-statement-offset) + ((parent-is "field_declaration_list") point-min c-ts-common-statement-offset) + + ;; {} blocks. + ((node-is "}") point-min c-ts-mode--close-bracket-offset) + ((parent-is "compound_statement") point-min c-ts-common-statement-offset) + ((node-is "compound_statement") point-min c-ts-common-statement-offset) + ,@(when (eq mode 'cpp) `(((node-is "field_initializer_list") parent-bol ,(* c-ts-mode-indent-offset 2))))))) `((gnu @@ -249,19 +289,6 @@ MODE is either `c' or `cpp'." ((parent-is "do_statement") parent-bol 0) ,@common)))) -(defun c-ts-mode--set-indent-style (mode) - "Helper function to set indentation style. -MODE is either `c' or `cpp'." - (let ((style - (if (functionp c-ts-mode-indent-style) - (funcall c-ts-mode-indent-style) - (pcase c-ts-mode-indent-style - ('gnu (alist-get 'gnu (c-ts-mode--indent-styles mode))) - ('k&r (alist-get 'k&r (c-ts-mode--indent-styles mode))) - ('bsd (alist-get 'bsd (c-ts-mode--indent-styles mode))) - ('linux (alist-get 'linux (c-ts-mode--indent-styles mode))))))) - `((,mode ,@style)))) - (defun c-ts-mode--top-level-label-matcher (node &rest _) "A matcher that matches a top-level label. NODE should be a labeled_statement." @@ -273,90 +300,6 @@ NODE should be a labeled_statement." "labeled_statement") (not (treesit-node-top-level func "compound_statement"))))) -(defvar c-ts-mode-indent-block-type-regexp - (rx (or "compound_statement" - "field_declaration_list" - "enumerator_list")) - "Regexp matching types of block nodes (i.e., {} blocks).") - -(defvar c-ts-mode--statement-offset-post-processr nil - "A functions that makes adjustments to `c-ts-mode--statement-offset'. - -This is a function that takes two arguments, the current indent -level and the current node, and returns a new level. - -When `c-ts-mode--statement-offset' runs and go up the parse tree, -it increments the indent level when some condition are met in -each level. At each level, after (possibly) incrementing the -offset, it calls this function, passing it the current indent -level and the current node, and use the return value as the new -indent level.") - -(defun c-ts-mode--statement-offset (node parent &rest _) - "This anchor is used for children of a statement inside a block. - -This function basically counts the number of block nodes (defined -by `c-ts-mode--indent-block-type-regexp') between NODE and the -root node (not counting NODE itself), and multiply that by -`c-ts-mode-indent-offset'. - -To support GNU style, on each block level, this function also -checks whether the opening bracket { is on its own line, if so, -it adds an extra level, except for the top-level. - -PARENT is NODE's parent." - (let ((level 0)) - ;; If point is on an empty line, NODE would be nil, but we pretend - ;; there is a statement node. - (when (null node) - (setq node t)) - (while (if (eq node t) - (setq node parent) - (setq node (treesit-node-parent node))) - (when (string-match-p c-ts-mode-indent-block-type-regexp - (treesit-node-type node)) - (cl-incf level) - (save-excursion - (goto-char (treesit-node-start node)) - ;; Add an extra level if the opening bracket is on its own - ;; line, except (1) it's at top-level, or (2) it's immediate - ;; parent is another block. - (cond ((bolp) nil) ; Case (1). - ((let ((parent-type (treesit-node-type - (treesit-node-parent node)))) - ;; Case (2). - (and parent-type - (string-match-p c-ts-mode-indent-block-type-regexp - parent-type))) - nil) - ;; Add a level. - ((looking-back (rx bol (* whitespace)) - (line-beginning-position)) - (cl-incf level))))) - (when c-ts-mode--statement-offset-post-processr - (setq level (funcall c-ts-mode--statement-offset-post-processr - level node)))) - (* level c-ts-mode-indent-offset))) - -(defun c-ts-mode--fix-bracketless-indent (level node) - "Takes LEVEL and NODE and returns adjusted LEVEL. -This fixes indentation for cases shown in bug#61026. Basically -in C/C++, constructs like if, for, while sometimes don't have -bracket." - (if (and (not (equal (treesit-node-type node) "compound_statement")) - (member (treesit-node-type (treesit-node-parent node)) - '("if_statement" "while_statement" "do_statement" - "for_statement"))) - (1+ level) - level)) - -(defun c-ts-mode--close-bracket-offset (node parent &rest _) - "Offset for the closing bracket, NODE. -It's basically one level less that the statements in the block. -PARENT is NODE's parent." - (- (c-ts-mode--statement-offset node parent) - c-ts-mode-indent-offset)) - ;;; Font-lock (defvar c-ts-mode--preproc-keywords @@ -757,7 +700,8 @@ the semicolon. This function skips the semicolon." (defvar-keymap c-ts-mode-map :doc "Keymap for the C language with tree-sitter" :parent prog-mode-map - "C-c C-q" #'c-ts-mode-indent-defun) + "C-c C-q" #'c-ts-mode-indent-defun + "C-c ." #'c-ts-mode-set-style) ;;;###autoload (define-derived-mode c-ts-base-mode prog-mode "C" @@ -817,8 +761,14 @@ the semicolon. This function skips the semicolon." ;; Indent. (when (eq c-ts-mode-indent-style 'linux) (setq-local indent-tabs-mode t)) - (setq-local c-ts-mode--statement-offset-post-processr - #'c-ts-mode--fix-bracketless-indent) + (setq-local c-ts-common-indent-offset 'c-ts-mode-indent-offset) + (setq-local c-ts-common-indent-block-type-regexp + (rx (or "compound_statement" + "field_declaration_list" + "enumerator_list"))) + (setq-local c-ts-common-indent-bracketless-type-regexp + (rx (or "if_statement" "do_statement" + "for_statement" "while_statement"))) ;; Comment (c-ts-common-comment-setup) @@ -871,7 +821,7 @@ in your configuration." (setq-local comment-end " */") ;; Indent. (setq-local treesit-simple-indent-rules - (c-ts-mode--set-indent-style 'c)) + (c-ts-mode--get-indent-style 'c)) ;; Font-lock. (setq-local treesit-font-lock-settings (c-ts-mode--font-lock-settings 'c)) (treesit-major-mode-setup))) @@ -907,7 +857,7 @@ in your configuration." ;; Indent. (setq-local treesit-simple-indent-rules - (c-ts-mode--set-indent-style 'cpp)) + (c-ts-mode--get-indent-style 'cpp)) ;; Font-lock. (setq-local treesit-font-lock-settings (c-ts-mode--font-lock-settings 'cpp)) diff --git a/lisp/progmodes/dockerfile-ts-mode.el b/lisp/progmodes/dockerfile-ts-mode.el index 23ac48a6117..c9125bc6cbd 100644 --- a/lisp/progmodes/dockerfile-ts-mode.el +++ b/lisp/progmodes/dockerfile-ts-mode.el @@ -51,9 +51,27 @@ ((parent-is "expose_instruction") (nth-sibling 1) 0) ((parent-is "label_instruction") (nth-sibling 1) 0) ((parent-is "shell_command") first-sibling 0) - ((parent-is "string_array") first-sibling 1))) + ((parent-is "string_array") first-sibling 1) + ((dockerfile-ts-mode--line-continuation-p) dockerfile-ts-mode--line-continuation-anchor 0))) "Tree-sitter indent rules.") +(defun dockerfile-ts-mode--line-continuation-p () + "Return t if the current node is a line continuation node." + (lambda (node _ _ &rest _) + (string= (treesit-node-type node) "\n"))) + +(defun dockerfile-ts-mode--line-continuation-anchor (_ _ &rest _) + "This anchor is used to align any nodes that are part of a line +continuation to the previous entry." + (save-excursion + (forward-line -1) + (let ((prev-node (treesit-node-at (point)))) + (if (string= (treesit-node-type prev-node) "\\\n") + (back-to-indentation) + (forward-word) + (forward-char)) + (+ 1 (- (point) (pos-bol)))))) + (defvar dockerfile-ts-mode--keywords '("ADD" "ARG" "AS" "CMD" "COPY" "CROSS_BUILD" "ENTRYPOINT" "ENV" "EXPOSE" "FROM" "HEALTHCHECK" "LABEL" "MAINTAINER" "ONBUILD" "RUN" diff --git a/lisp/progmodes/java-ts-mode.el b/lisp/progmodes/java-ts-mode.el index dbd63698770..e4153725efd 100644 --- a/lisp/progmodes/java-ts-mode.el +++ b/lisp/progmodes/java-ts-mode.el @@ -89,6 +89,7 @@ ((query "(method_declaration (block (_) @indent))") parent-bol java-ts-mode-indent-offset) ((parent-is "local_variable_declaration") parent-bol java-ts-mode-indent-offset) ((parent-is "expression_statement") parent-bol java-ts-mode-indent-offset) + ((match "type_identifier" "field_declaration") parent-bol 0) ((parent-is "field_declaration") parent-bol java-ts-mode-indent-offset) ((parent-is "return_statement") parent-bol java-ts-mode-indent-offset) ((parent-is "variable_declarator") parent-bol java-ts-mode-indent-offset) diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el index a869cdc5fdb..df0d1c96965 100644 --- a/lisp/progmodes/python.el +++ b/lisp/progmodes/python.el @@ -6715,8 +6715,8 @@ implementations: `python-mode' and `python-ts-mode'." (when python-indent-guess-indent-offset (python-indent-guess-indent-offset)) - (add-to-list 'auto-mode-alist - '("\\.py[iw]?\\'\\|python[0-9.]*" . python-ts-mode)))) + (add-to-list 'auto-mode-alist '("\\.py[iw]?\\'" . python-ts-mode)) + (add-to-list 'interpreter-mode-alist '("python[0-9.]*" . python-ts-mode)))) ;;; Completion predicates for M-x ;; Commands that only make sense when editing Python code diff --git a/lisp/progmodes/rust-ts-mode.el b/lisp/progmodes/rust-ts-mode.el index 3a6cb61b719..2812e39c101 100644 --- a/lisp/progmodes/rust-ts-mode.el +++ b/lisp/progmodes/rust-ts-mode.el @@ -275,6 +275,28 @@ Return nil if there is no name or if NODE is not a defun node." (treesit-node-text (treesit-node-child-by-field-name node "name") t)))) +(defun rust-ts-mode--syntax-propertize (beg end) + "Apply syntax text property to template delimiters between BEG and END. + +< and > are usually punctuation, e.g., as greater/less-than. But +when used for types, they should be considered pairs. + +This function checks for < and > in the changed RANGES and apply +appropriate text property to alter the syntax of template +delimiters < and >'s." + (goto-char beg) + (while (re-search-forward (rx (or "<" ">")) end t) + (pcase (treesit-node-type + (treesit-node-parent + (treesit-node-at (match-beginning 0)))) + ("type_arguments" + (put-text-property (match-beginning 0) + (match-end 0) + 'syntax-table + (pcase (char-before) + (?< '(4 . ?>)) + (?> '(5 . ?<)))))))) + ;;;###autoload (define-derived-mode rust-ts-mode prog-mode "Rust" "Major mode for editing Rust, powered by tree-sitter." @@ -284,6 +306,10 @@ Return nil if there is no name or if NODE is not a defun node." (when (treesit-ready-p 'rust) (treesit-parser-create 'rust) + ;; Syntax. + (setq-local syntax-propertize-function + #'rust-ts-mode--syntax-propertize) + ;; Comments. (c-ts-common-comment-setup) diff --git a/lisp/treesit.el b/lisp/treesit.el index 29b29d2665a..c9f2e339dc2 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -554,7 +554,25 @@ omitted, default END to BEG." "Generic tree-sitter font-lock error" 'treesit-error) -(defvar-local treesit-font-lock-level 3 +(defun treesit--font-lock-level-setter (sym val) + "Custom setter for `treesit-font-lock-level'." + (set-default sym val) + (named-let loop ((res nil) + (buffers (buffer-list))) + (if (null buffers) + (mapc (lambda (b) + (with-current-buffer b + (setq-local treesit-font-lock-level val) + (treesit-font-lock-recompute-features) + (treesit-font-lock-fontify-region (point-min) (point-max)))) + res) + (let ((buffer (car buffers))) + (with-current-buffer buffer + (if (treesit-parser-list) + (loop (append res (list buffer)) (cdr buffers)) + (loop res (cdr buffers)))))))) + +(defcustom treesit-font-lock-level 3 "Decoration level to be used by tree-sitter fontifications. Major modes categorize their fontification features into levels, @@ -562,16 +580,24 @@ from 1 which is the absolute minimum, to 4 that yields the maximum fontifications. Level 1 usually contains only comments and definitions. -Level 2 usually adds keywords, strings, constants, types, etc. -Level 3 usually represents a full-blown fontification, including -assignment, constants, numbers, properties, etc. +Level 2 usually adds keywords, strings, data types, etc. +Level 3 usually represents full-blown fontifications, including +assignments, constants, numbers and literals, properties, etc. Level 4 adds everything else that can be fontified: delimiters, -operators, brackets, all functions and variables, etc. +operators, brackets, punctuation, all functions and variables, etc. In addition to the decoration level, individual features can be turned on/off by calling `treesit-font-lock-recompute-features'. Changing the decoration level requires calling -`treesit-font-lock-recompute-features' to have an effect.") +`treesit-font-lock-recompute-features' to have an effect, unless +done via `customize-variable'. + +To see which syntactical categories are fontified by each level +in a particular major mode, examine the buffer-local value of the +variable `treesit-font-lock-feature-list'." + :type 'integer + :set #'treesit--font-lock-level-setter + :version "29.1") (defvar-local treesit--font-lock-query-expand-range (cons 0 0) "The amount to expand the start and end of the region when fontifying. @@ -892,27 +918,20 @@ LIMIT is the recursion limit, which defaults to 100." (push r result)) (push child result)) (setq child (treesit-node-next-sibling child))) - ;; If NODE has no child, keep NODE. - (or result (list node)))) + ;; If NODE has no child, keep NODE. If LIMIT is exceeded, return + ;; nil. + (or result (and (> limit 0) (list node))))) (defsubst treesit--node-length (node) "Return the length of the text of NODE." (- (treesit-node-end node) (treesit-node-start node))) -(defvar-local treesit--font-lock-fast-mode nil +(defvar-local treesit--font-lock-fast-mode 'unspecified "If this variable is t, change the way we query so it's faster. This is not a general optimization and should be RARELY needed! See comments in `treesit-font-lock-fontify-region' for more detail.") -(defvar-local treesit--font-lock-fast-mode-grace-count 5 - "Grace counts before we turn on the fast mode. - -When query takes abnormally long time to execute, we turn on the -\"fast mode\", but just to be on the safe side, we only turn on -the fast mode after this number of offenses. See bug#60691, -bug#60223.") - ;; Some details worth explaining: ;; ;; 1. When we apply face to a node, we clip the face into the @@ -964,36 +983,34 @@ If LOUDLY is non-nil, display some debugging information." (enable (nth 1 setting)) (override (nth 3 setting)) (language (treesit-query-language query))) - (when-let ((nodes (list (treesit-buffer-root-node language))) - ;; Only activate if ENABLE flag is t. - (activate (eq t enable))) - (ignore activate) - ;; If we run into problematic files, use the "fast mode" to - ;; try to recover. See comment #2 above for more explanation. - (when treesit--font-lock-fast-mode - (setq nodes (treesit--children-covering-range-recurse - (car nodes) start end (* 4 jit-lock-chunk-size)))) + ;; Use deterministic way to decide whether to turn on "fast + ;; mode". (See bug#60691, bug#60223.) + (when (eq treesit--font-lock-fast-mode 'unspecified) + (pcase-let ((`(,max-depth ,max-width) + (treesit-subtree-stat + (treesit-buffer-root-node language)))) + (if (or (> max-depth 100) (> max-width 4000)) + (setq treesit--font-lock-fast-mode t) + (setq treesit--font-lock-fast-mode nil)))) + + (when-let* ((root (treesit-buffer-root-node language)) + (nodes (if (eq t treesit--font-lock-fast-mode) + (treesit--children-covering-range-recurse + root start end (* 4 jit-lock-chunk-size)) + (list (treesit-buffer-root-node language)))) + ;; Only activate if ENABLE flag is t. + (activate (eq t enable))) + (ignore activate) ;; Query each node. (dolist (sub-node nodes) (let* ((delta-start (car treesit--font-lock-query-expand-range)) (delta-end (cdr treesit--font-lock-query-expand-range)) - (start-time (current-time)) (captures (treesit-query-capture sub-node query (max (- start delta-start) (point-min)) - (min (+ end delta-end) (point-max)))) - (end-time (current-time))) - ;; If for any query the query time is strangely long, - ;; switch to fast mode (see comments above). - (when (and (null treesit--font-lock-fast-mode) - (> (time-to-seconds - (time-subtract end-time start-time)) - 0.01)) - (if (> treesit--font-lock-fast-mode-grace-count 0) - (cl-decf treesit--font-lock-fast-mode-grace-count) - (setq-local treesit--font-lock-fast-mode t))) + (min (+ end delta-end) (point-max))))) ;; For each captured node, fontify that node. (with-silent-modifications @@ -1002,12 +1019,14 @@ If LOUDLY is non-nil, display some debugging information." (node (cdr capture)) (node-start (treesit-node-start node)) (node-end (treesit-node-end node))) + ;; If node is not in the region, take them out. See ;; comment #3 above for more detail. (if (and (facep face) (or (>= start node-end) (>= node-start end))) (when (or loudly treesit--font-lock-verbose) (message "Captured node %s(%s-%s) but it is outside of fontifing region" node node-start node-end)) + (cond ((facep face) (treesit-fontify-with-override @@ -1015,6 +1034,7 @@ If LOUDLY is non-nil, display some debugging information." face override)) ((functionp face) (funcall face node override start end))) + ;; Don't raise an error if FACE is neither a face nor ;; a function. This is to allow intermediate capture ;; names used for #match and #eq. @@ -3033,10 +3053,10 @@ function signals an error." :no-value (treesit-parser-set-included-ranges parser '((1 . 4) (5 . 8)))) (treesit-parser-included-ranges :no-eval (treesit-parser-included-ranges parser) - :eg-result '((1 . 4) (5 . 8))) + :eg-result ((1 . 4) (5 . 8))) (treesit-query-range :no-eval (treesit-query-range node '((script_element) @cap)) - :eg-result-string '((1 . 4) (5 . 8))) + :eg-result ((1 . 4) (5 . 8))) "Retrieving a node" @@ -3182,7 +3202,12 @@ function signals an error." :eg-result-string "#<treesit-node (translation_unit) in 1-11>") (treesit-query-string :no-eval (treesit-query-string "int c = 0;" '((identifier) @id) 'c) - :eg-result-string "((id . #<treesit-node (identifier) in 5-6>))")) + :eg-result-string "((id . #<treesit-node (identifier) in 5-6>))") + + "Misc" + (treesit-subtree-stat + :no-eval (treesit-subtree-stat node) + :eg-result (6 33 487))) (provide 'treesit) diff --git a/src/comp.c b/src/comp.c index 6ff1915ef5b..ba549155925 100644 --- a/src/comp.c +++ b/src/comp.c @@ -5912,6 +5912,3 @@ file -> CU. */); defsubr (&Snative_comp_available_p); } -/* Local Variables: */ -/* c-file-offsets: ((arglist-intro . +)) */ -/* End: */ diff --git a/src/treesit.c b/src/treesit.c index 917db582676..b210ec0923a 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -3312,6 +3312,68 @@ a regexp. */) return parent; } +DEFUN ("treesit-subtree-stat", + Ftreesit_subtree_stat, + Streesit_subtree_stat, 1, 1, 0, + doc: /* Return information about the subtree of NODE. + +Return a list (MAX-DEPTH MAX-WIDTH COUNT), where MAX-DEPTH is the +maximum depth of the subtree, MAX-WIDTH is the maximum number of +direct children of nodes in the subtree, and COUNT is the number of +nodes in the subtree, including NODE. */) + (Lisp_Object node) +{ + /* Having a limit on the depth to traverse doesn't have much impact + on the time it takes, so I left that out. */ + CHECK_TS_NODE (node); + + treesit_initialize (); + + TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (node)->node); + ptrdiff_t max_depth = 1; + ptrdiff_t max_width = 0; + ptrdiff_t count = 0; + ptrdiff_t current_depth = 0; + + /* Traverse the subtree depth-first. */ + while (true) + { + count++; + + /* Go down depth-first. */ + while (ts_tree_cursor_goto_first_child (&cursor)) + { + current_depth++; + count++; + /* While we're at here, measure the number of siblings. */ + ptrdiff_t width_count = 1; + while (ts_tree_cursor_goto_next_sibling (&cursor)) + width_count++; + max_width = max (max_width, width_count); + /* Go back to the first sibling. */ + treesit_assume_true (ts_tree_cursor_goto_parent (&cursor)); + treesit_assume_true (ts_tree_cursor_goto_first_child (&cursor)); + } + max_depth = max (max_depth, current_depth); + + /* Go to next sibling. If there is no next sibling, go to + parent's next sibling, and so on. If there is no more + parent, we've traversed the whole subtree, stop. */ + while (!ts_tree_cursor_goto_next_sibling (&cursor)) + { + if (ts_tree_cursor_goto_parent (&cursor)) + current_depth--; + else + { + ts_tree_cursor_delete (&cursor); + return list3 (make_fixnum (max_depth), + make_fixnum (max_width), + make_fixnum (count)); + } + } + } +} + #endif /* HAVE_TREE_SITTER */ DEFUN ("treesit-available-p", Ftreesit_available_p, @@ -3511,6 +3573,7 @@ then in the system default locations for dynamic libraries, in that order. */); defsubr (&Streesit_search_subtree); defsubr (&Streesit_search_forward); defsubr (&Streesit_induce_sparse_tree); + defsubr (&Streesit_subtree_stat); #endif /* HAVE_TREE_SITTER */ defsubr (&Streesit_available_p); } diff --git a/test/lisp/progmodes/c-ts-mode-resources/indent.erts b/test/lisp/progmodes/c-ts-mode-resources/indent.erts index 67654404a77..0ecbf922b15 100644 --- a/test/lisp/progmodes/c-ts-mode-resources/indent.erts +++ b/test/lisp/progmodes/c-ts-mode-resources/indent.erts @@ -114,7 +114,9 @@ int main() { { puts ("Hello"); } - for (int i=0; i<5; i++) + for (int i=0; + i<5; + i++) if (true) { puts ("Hello"); @@ -141,7 +143,9 @@ int main() { if (true) { puts ("Hello"); } - for (int i=0; i<5; i++) + for (int i=0; + i<5; + i++) if (true) { puts ("Hello"); } |