aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Whitton <spwhitton@spwhitton.name>2023-03-16 12:00:35 -0700
committerSean Whitton <spwhitton@spwhitton.name>2023-03-16 19:05:06 -0700
commit23497607bf7ec831dd57bf06bf6cd802c3ec6b8a (patch)
treecb92d5897434af338d9bd1cceb255ff1bb9fca69
parent8fa41a15f184660ab5bda5f86d645ba9b2582389 (diff)
downloadconsfigurator-23497607bf7ec831dd57bf06bf6cd802c3ec6b8a.tar.gz
new reader macros for shell- and Perl-style matching & replacement
Signed-off-by: Sean Whitton <spwhitton@spwhitton.name>
-rw-r--r--doc/news.rst7
-rw-r--r--doc/reader.rst50
-rw-r--r--emacs/consfigurator.el.in7
-rw-r--r--src/property/cron.lisp2
-rw-r--r--src/property/crypttab.lisp7
-rw-r--r--src/reader.lisp122
-rw-r--r--src/util.lisp12
-rw-r--r--tests/reader.lisp74
8 files changed, 261 insertions, 20 deletions
diff --git a/doc/news.rst b/doc/news.rst
index da40c27..80701d6 100644
--- a/doc/news.rst
+++ b/doc/news.rst
@@ -26,6 +26,9 @@ you should review this document and see if your consfig needs updating.
1.3.0 (unreleased)
------------------
+- New reader macros ``#~m//`` and ``#~s///`` for shell- and Perl-style regular
+ expression matching and replacement.
+
- New reader macro ``#>>EOF>>`` which is like ``#>EOF>`` except that it skips
over the remainder of the current line and its newline. This is more like
how heredocs work in other languages.
@@ -57,8 +60,8 @@ you should review this document and see if your consfig needs updating.
without having to read the source of properties modules.
- New Emacs major mode, ``consfigurator-lisp-mode``. This takes care of
- informing Emacs that parts of the buffer are CL-INTERPOL and CL-HEREDOC
- strings, fixing SLIME's C-c C-c in certain cases.
+ informing Emacs that parts of the buffer are CL-INTERPOL, CL-HEREDOC and our
+ ``#~m//`` and ``#~s///`` strings, fixing SLIME's C-c C-c in certain cases.
1.2.2 (2023-02-20)
------------------
diff --git a/doc/reader.rst b/doc/reader.rst
index e6f9c3a..1d78cd2 100644
--- a/doc/reader.rst
+++ b/doc/reader.rst
@@ -22,6 +22,52 @@ Sharp-question mark is the well-known CL-INTERPOL_ reader macro.
.. _CL-INTERPOL: https://edicl.github.io/cl-interpol/
+``#~m//``: PCRE matching
+------------------------
+
+This provides an abbreviation for shell- and Perl-style regexp matching:
+
+.. code-block:: none
+
+ (#~m/b.+b/i "FooBarBaz") => "BarB"
+ (#~m/b(.+)b/i "FooBarBaz") => #("ar")
+ (mapcar #3~m/(\w+)(\W+)(\w+)/ '("one two" "three four" "five six"))
+ => ("two" "four" "six")
+
+Any delimiters supported by ``CL-INTERPOL`` may be used, and the ``m`` is
+always optional. Trailing options ``g``, ``i``, ``m``, ``s`` and ``x`` are
+meaningful. The return value depends on the numeric argument before the
+tilde:
+
+- ``#~m//``, with no argument, returns a vector of the substrings
+ corresponding to the capture groups, or if there were no capture groups,
+ just the whole matched string.
+
+- ``#0~m//`` returns two values: the whole matched string, and a vector of
+ capture group substrings. (This is plain ``CL-PPCRE:SCAN-TO-STRINGS``.)
+
+- ``#n~m//`` returns two values: the nth capture group's substring, and a
+ vector of all the capture group substrings.
+
+``#!~m//``: PCRE negative matching
+----------------------------------
+
+Equivalent to ``(not #~m//)``.
+
+``#~s///``: PCRE substitution
+-----------------------------
+
+This provides an abbreviation for shell- and Perl-style regexp substitution:
+
+.. code-block:: none
+
+ (#~s/foo/bar/ "foobarbaz") => "foofoobaz"
+ (mapcar #~s/:.+:/`\&`/ '(":Hello:" ":Goodbye:")) => ("`:Hello:`" "`:Goodbye:`")
+
+Again, any delimiters supported by ``CL-INTERPOL`` may be used, and the same
+trailing options are meaningful. This is ``CL-PPCRE:REGEX-REPLACE`` or
+``CL-PPCRE:REGEX-REPLACE-ALL``, which see regarding return values.
+
``#>EOF>`` and ``#>>EOF>>``: Heredocs
-------------------------------------
@@ -55,3 +101,7 @@ See also
- `perlop(1) <https://perldoc.perl.org/perlop>`_
- `inferior-shell <https://cliki.net/inferior-shell>`_
+
+- `Let Over Lambda ch. 4
+ <https://letoverlambda.com/index.cl/guest/chap4.html>`_, which originally
+ inspired ``#~m//`` and ``#~s///``.
diff --git a/emacs/consfigurator.el.in b/emacs/consfigurator.el.in
index d9693f9..ed08766 100644
--- a/emacs/consfigurator.el.in
+++ b/emacs/consfigurator.el.in
@@ -152,12 +152,11 @@ Modes that use this should add `syntax-propertize-multiline' to
(car (string-to-syntax "!")))
ender))
(consfigurator--finish-propertize-qq-heredoc end))))))
- ;; Also recognise Let Over Lambda's #~ and a #!~ negated version.
- ;; We might want to add these to Consfigurator's readtable.
((rx (| (: (group-n 1 "#?" (group-n 2 (opt (| "rx" ?r ?x))))
(group-n 3 (not (in alnum ?\\))))
- (: (group-n 1 ?# (opt ?!) ?~ (group-n 2
- (opt (| "tr" ?m ?s ?y))))
+ (: (group-n 1
+ ?# (0+ digit) (opt ?!) ?~ (group-n 2
+ (opt (| "tr" ?m ?s ?y))))
(group-n 3 nonl))))
(1 (and (not (in-string-or-comment-p))
(eval-when-compile (string-to-syntax "'"))))
diff --git a/src/property/cron.lisp b/src/property/cron.lisp
index bba953f..e68b6ca 100644
--- a/src/property/cron.lisp
+++ b/src/property/cron.lisp
@@ -135,7 +135,7 @@ directory."
(old (runlines :may-fail "crontab" "-l"))
(new
(mapcar
- (lambda (line) (re:regex-replace-all #?/\$HOME/ line home))
+ #~s/\$HOME/${home}/g
(nconc
(list "# Automatically updated by Consfigurator; do not edit" "")
(loop for (k v) on env by #'cddr
diff --git a/src/property/crypttab.lisp b/src/property/crypttab.lisp
index cc44f50..6538409 100644
--- a/src/property/crypttab.lisp
+++ b/src/property/crypttab.lisp
@@ -25,10 +25,9 @@
(if (string= val "") nil val)))
(defun get-device-parent (device)
- (multiple-value-bind (match groups)
- (re:scan-to-strings #?/^1\s+dependencies\s*:\s*\((\S+)\)$/
- (run "dmsetup" "deps" "-o" "blkdevname" device))
- (and match (merge-pathnames (elt groups 0) #P"/dev/"))))
+ (aand (#1~/^1\s+dependencies\s*:\s*\((\S+)\)$/
+ (run "dmsetup" "deps" "-o" "blkdevname" device))
+ (merge-pathnames it #P"/dev/")))
(defmethod ct-target ((volume opened-luks-container))
(volume-label volume))
diff --git a/src/reader.lisp b/src/reader.lisp
index f744fa4..3d73046 100644
--- a/src/reader.lisp
+++ b/src/reader.lisp
@@ -76,10 +76,130 @@ indented heredoc; see perlop(1)."
finally (return (unlines accum)))
heredoc))))
+;; Originally inspired by some ideas of Doug Hoyt in his *Let Over Lambda*.
+(defun perl-tilde-reader (stream char arg)
+ (flet ((readerr (&rest args)
+ (apply #'simple-reader-error stream args)))
+ (let ((negated (and (char= #\! char)
+ (or (char= #\~ (read-char stream t :eof t))
+ (readerr "Expected \"~~\" following \"!\"."))))
+ first-delim op)
+ (loop for next = (read-char stream t :eof t)
+ while (alpha-char-p next) collect next into accum
+ finally (setq first-delim next
+ op (cond ((null accum) #\m)
+ ((= (length accum) 1) (car accum))
+ ((string= (coerce accum 'string) "tr") #\y)
+ (t (readerr "Unknown regexp operator ~S."
+ (coerce accum 'string))))))
+ (let* ((cl-interpol:*inner-delimiters*
+ (and (not (eql first-delim #\'))
+ ;; This is the same as CL-INTERPOL's regexp mode, but we
+ ;; don't turn that on for SECOND-ARG.
+ '((#\{ . #\}))))
+ (first-arg
+ (cl-interpol:interpol-reader
+ (make-concatenated-stream
+ (make-string-input-stream (format nil "r~A" first-delim))
+ stream)
+ #\? nil))
+ (second-delim
+ (and (member op '(#\s #\y))
+ (if (atom (find first-delim cl-interpol:*outer-delimiters*
+ :key #'ensure-car))
+ first-delim
+ (read-char stream t :eof t))))
+ (second-closer
+ (aand (find second-delim cl-interpol:*outer-delimiters*
+ :key #'ensure-car)
+ (consp it) (cdr it)))
+ (second-arg
+ ;; Here we have to do our own pass before calling
+ ;; CL-INTERPOL:INTERPOL-READER because we need to additionally
+ ;; preserve \`, \& and \', and its regexp mode doesn't do that.
+ ;; We do want to use CL-INTERPOL:INTERPOL-READER too, because
+ ;; interpolating into replacement strings is useful.
+ (and second-delim
+ (loop
+ with escaped and depth = 0
+ and result = (make-array 1 :fill-pointer 1
+ :element-type 'character
+ ;; Use our own delimiter to
+ ;; ensure no regexp mode.
+ :initial-element #\#)
+ for next = (read-char stream t :eof t)
+ for extra-escape = (if escaped
+ (or (digit-char-p next)
+ (member next '(#\` #\& #\')))
+ (eql next #\#))
+ if escaped do (setq escaped nil)
+ else do (switch (next)
+ (second-delim (if second-closer
+ (incf depth)
+ (loop-finish)))
+ (second-closer (if (zerop depth)
+ (loop-finish)
+ (decf depth)))
+ (#\\ (setq escaped t)))
+ when extra-escape
+ do (vector-push-extend #\\ result)
+ do (vector-push-extend next result)
+ finally (vector-push-extend #\# result)
+ (return (cl-interpol:interpol-reader
+ (make-string-input-stream result)
+ #\? nil)))))
+ (modes (loop for next = (read-char stream t :eof t)
+ while (alpha-char-p next) collect next
+ finally (unread-char next stream)))
+ (scanner-args
+ (list first-arg
+ :case-insensitive-mode (and (find #\i modes) t)
+ :multi-line-mode (and (find #\m modes) t)
+ :single-line-mode (and (find #\s modes) t)
+ ;; We're choosing not to use CL-INTERPOL's own extended
+ ;; mode because CL-PPCRE's is closer to Perl's.
+ :extended-mode (and (find #\x modes) t)))
+ (scanner (if (constantp first-arg)
+ `(load-time-value
+ (re:create-scanner ,@scanner-args))
+ `(re:create-scanner ,@scanner-args)))
+ (body
+ (ecase op
+ (#\m
+ (cond ((member #\g modes)
+ `(re:all-matches-as-strings ,scanner target-string))
+ ((not arg)
+ ;; The number of capture groups is constant if
+ ;; FIRST-ARG is constant, so could we self-replace
+ ;; on first execution / otherwise memoise?
+ `(multiple-value-bind (zeroth rest)
+ (re:scan-to-strings ,scanner target-string)
+ ;; We could (coerce rest 'list) for use with
+ ;; DESTRUCTURING-BIND. But there is already
+ ;; CL-PPCRE:REGISTER-GROUPS-BIND.
+ (if (zerop (length rest)) zeroth rest)))
+ ((zerop arg)
+ `(re:scan-to-strings ,scanner target-string))
+ (t ; ARG is a positive integer
+ `(aand
+ (nth-value 1 (re:scan-to-strings ,scanner
+ target-string))
+ (values (aref it ,(1- arg)) it)))))
+ (#\s `(,(if (member #\g modes)
+ 're:regex-replace-all
+ 're:regex-replace)
+ ,scanner target-string ,second-arg))
+ (#\y (readerr "Transliteration unimplemented.")))))
+ `(lambda (target-string)
+ ,(if negated `(not ,body) body))))))
+
(named-readtables:defreadtable :consfigurator
(:merge :standard)
(:dispatch-macro-char #\# #\? #'cl-interpol:interpol-reader)
- (:dispatch-macro-char #\# #\> #'read-heredoc))
+ (:dispatch-macro-char #\# #\> #'read-heredoc)
+
+ (:dispatch-macro-char #\# #\~ #'perl-tilde-reader)
+ (:dispatch-macro-char #\# #\! #'perl-tilde-reader))
(named-readtables:defreadtable :consfigurator.without-read-eval
(:merge :consfigurator)
diff --git a/src/util.lisp b/src/util.lisp
index 20a3714..fea974d 100644
--- a/src/util.lisp
+++ b/src/util.lisp
@@ -249,9 +249,7 @@ simple collections of readably-printable values."
(defun parse-username-from-id (output)
"Where OUTPUT is the output of the id(1) command, extract the username."
- (multiple-value-bind (match groups)
- (re:scan-to-strings "^uid=[0-9]+\\(([^)]+)" output)
- (and match (elt groups 0))))
+ (#1~/^uid=[0-9]+\(([^)]+)/ output))
(defun abbreviate-consfigurator-package (name)
(with-standard-io-syntax
@@ -421,11 +419,9 @@ on simple scripts embedded in source code, written with newlines for the sake
of maintainability. Converting those scripts to single lines before they are
executed improves Consfigurator's debug output, and also makes process names
visible to remote commands like ps(1) more readable."
- (re:regex-replace-all
- #?/\s+/ (re:regex-replace-all "(then|else|elif|fi|case|in|;;|do|done);"
- (format nil "~{~A~^; ~}" (lines script))
- "\\1")
- " "))
+ (#~s/\s+/ /g
+ (#~s/(then|else|elif|fi|case|in|;;|do|done);/\1/g
+ (format nil "~{~A~^; ~}" (lines script)))))
;;;; Progress & debug printing
diff --git a/tests/reader.lisp b/tests/reader.lisp
index a56c6c0..1383196 100644
--- a/tests/reader.lisp
+++ b/tests/reader.lisp
@@ -34,3 +34,77 @@ blah
"line 1
line 2
")
+
+(deftest perl-tilde-reader.1
+ (#~/bar/ "foo bar ")
+ "bar")
+
+(deftest perl-tilde-reader.2
+ (#~/(f.*) (bar)/ "foo bar ")
+ #("foo" "bar"))
+
+(deftest perl-tilde-reader.3
+ (#0~/(f.*) (bar)/ "foo bar ")
+ "foo bar" #("foo" "bar"))
+
+(deftest perl-tilde-reader.4
+ (#2~/(f.*) (bar)/ "foo bar ")
+ "bar" #("foo" "bar"))
+
+(deftest perl-tilde-reader.5
+ (#!~/bar/ "foo")
+ t)
+
+(deftest perl-tilde-reader.6
+ (handler-case (read-from-string "(#!/bar/ \"foo\")")
+ (simple-reader-error (err)
+ (format nil (simple-condition-format-control err))))
+ "Expected \"~\" following \"!\".")
+
+(deftest perl-tilde-reader.7
+ (#~/\w{2}/g "aa bb cc")
+ ("aa" "bb" "cc"))
+
+(deftest perl-tilde-reader.8
+ (mapcar #~s/foo/bar/ '("foo" "bar"))
+ ("bar" "bar"))
+
+(deftest perl-tilde-reader.9
+ (#~s/${(+ 1 1)}/${(+ 2 2)}/ "2")
+ "4" t)
+
+(deftest perl-tilde-reader.10
+ (#~s/\w/\w/ "a")
+ "w" t)
+
+(deftest perl-tilde-reader.11
+ (#~s/foo/bar/ "foo foo foo")
+ "bar foo foo" t)
+
+(deftest perl-tilde-reader.12
+ (#~s/foo/bar/g "foo foo foo")
+ "bar bar bar" t)
+
+(deftest perl-tilde-reader.13
+ (#~s/ \s\w d \w\s /!/ix "aDa bDa cDa")
+ "aDa!cDa" t)
+
+(deftest perl-tilde-reader.14
+ (#~s[^(\d) ]{`\1` } "4 foo")
+ "`4` foo" t)
+
+(deftest perl-tilde-reader.15
+ (#~s(\d)((\&\)\()) " 4 ")
+ " (4)() " t)
+
+(deftest perl-tilde-reader.16
+ (#~s/foo/#bar#/ "foo")
+ "#bar#" t)
+
+(deftest perl-tilde-reader.17
+ (#~s#foo#\#bar\## "foo")
+ "#bar#" t)
+
+(deftest perl-tilde-reader.18
+ (#~s'foo'${bar}' "foo")
+ "${bar}" t)