aboutsummaryrefslogtreecommitdiff
path: root/src/reader.lisp
diff options
context:
space:
mode:
authorSean Whitton <spwhitton@spwhitton.name>2023-03-16 12:00:35 -0700
committerSean Whitton <spwhitton@spwhitton.name>2023-03-16 19:05:06 -0700
commit23497607bf7ec831dd57bf06bf6cd802c3ec6b8a (patch)
treecb92d5897434af338d9bd1cceb255ff1bb9fca69 /src/reader.lisp
parent8fa41a15f184660ab5bda5f86d645ba9b2582389 (diff)
downloadconsfigurator-23497607bf7ec831dd57bf06bf6cd802c3ec6b8a.tar.gz
new reader macros for shell- and Perl-style matching & replacement
Signed-off-by: Sean Whitton <spwhitton@spwhitton.name>
Diffstat (limited to 'src/reader.lisp')
-rw-r--r--src/reader.lisp122
1 files changed, 121 insertions, 1 deletions
diff --git a/src/reader.lisp b/src/reader.lisp
index f744fa4..3d73046 100644
--- a/src/reader.lisp
+++ b/src/reader.lisp
@@ -76,10 +76,130 @@ indented heredoc; see perlop(1)."
finally (return (unlines accum)))
heredoc))))
+;; Originally inspired by some ideas of Doug Hoyt in his *Let Over Lambda*.
+(defun perl-tilde-reader (stream char arg)
+ (flet ((readerr (&rest args)
+ (apply #'simple-reader-error stream args)))
+ (let ((negated (and (char= #\! char)
+ (or (char= #\~ (read-char stream t :eof t))
+ (readerr "Expected \"~~\" following \"!\"."))))
+ first-delim op)
+ (loop for next = (read-char stream t :eof t)
+ while (alpha-char-p next) collect next into accum
+ finally (setq first-delim next
+ op (cond ((null accum) #\m)
+ ((= (length accum) 1) (car accum))
+ ((string= (coerce accum 'string) "tr") #\y)
+ (t (readerr "Unknown regexp operator ~S."
+ (coerce accum 'string))))))
+ (let* ((cl-interpol:*inner-delimiters*
+ (and (not (eql first-delim #\'))
+ ;; This is the same as CL-INTERPOL's regexp mode, but we
+ ;; don't turn that on for SECOND-ARG.
+ '((#\{ . #\}))))
+ (first-arg
+ (cl-interpol:interpol-reader
+ (make-concatenated-stream
+ (make-string-input-stream (format nil "r~A" first-delim))
+ stream)
+ #\? nil))
+ (second-delim
+ (and (member op '(#\s #\y))
+ (if (atom (find first-delim cl-interpol:*outer-delimiters*
+ :key #'ensure-car))
+ first-delim
+ (read-char stream t :eof t))))
+ (second-closer
+ (aand (find second-delim cl-interpol:*outer-delimiters*
+ :key #'ensure-car)
+ (consp it) (cdr it)))
+ (second-arg
+ ;; Here we have to do our own pass before calling
+ ;; CL-INTERPOL:INTERPOL-READER because we need to additionally
+ ;; preserve \`, \& and \', and its regexp mode doesn't do that.
+ ;; We do want to use CL-INTERPOL:INTERPOL-READER too, because
+ ;; interpolating into replacement strings is useful.
+ (and second-delim
+ (loop
+ with escaped and depth = 0
+ and result = (make-array 1 :fill-pointer 1
+ :element-type 'character
+ ;; Use our own delimiter to
+ ;; ensure no regexp mode.
+ :initial-element #\#)
+ for next = (read-char stream t :eof t)
+ for extra-escape = (if escaped
+ (or (digit-char-p next)
+ (member next '(#\` #\& #\')))
+ (eql next #\#))
+ if escaped do (setq escaped nil)
+ else do (switch (next)
+ (second-delim (if second-closer
+ (incf depth)
+ (loop-finish)))
+ (second-closer (if (zerop depth)
+ (loop-finish)
+ (decf depth)))
+ (#\\ (setq escaped t)))
+ when extra-escape
+ do (vector-push-extend #\\ result)
+ do (vector-push-extend next result)
+ finally (vector-push-extend #\# result)
+ (return (cl-interpol:interpol-reader
+ (make-string-input-stream result)
+ #\? nil)))))
+ (modes (loop for next = (read-char stream t :eof t)
+ while (alpha-char-p next) collect next
+ finally (unread-char next stream)))
+ (scanner-args
+ (list first-arg
+ :case-insensitive-mode (and (find #\i modes) t)
+ :multi-line-mode (and (find #\m modes) t)
+ :single-line-mode (and (find #\s modes) t)
+ ;; We're choosing not to use CL-INTERPOL's own extended
+ ;; mode because CL-PPCRE's is closer to Perl's.
+ :extended-mode (and (find #\x modes) t)))
+ (scanner (if (constantp first-arg)
+ `(load-time-value
+ (re:create-scanner ,@scanner-args))
+ `(re:create-scanner ,@scanner-args)))
+ (body
+ (ecase op
+ (#\m
+ (cond ((member #\g modes)
+ `(re:all-matches-as-strings ,scanner target-string))
+ ((not arg)
+ ;; The number of capture groups is constant if
+ ;; FIRST-ARG is constant, so could we self-replace
+ ;; on first execution / otherwise memoise?
+ `(multiple-value-bind (zeroth rest)
+ (re:scan-to-strings ,scanner target-string)
+ ;; We could (coerce rest 'list) for use with
+ ;; DESTRUCTURING-BIND. But there is already
+ ;; CL-PPCRE:REGISTER-GROUPS-BIND.
+ (if (zerop (length rest)) zeroth rest)))
+ ((zerop arg)
+ `(re:scan-to-strings ,scanner target-string))
+ (t ; ARG is a positive integer
+ `(aand
+ (nth-value 1 (re:scan-to-strings ,scanner
+ target-string))
+ (values (aref it ,(1- arg)) it)))))
+ (#\s `(,(if (member #\g modes)
+ 're:regex-replace-all
+ 're:regex-replace)
+ ,scanner target-string ,second-arg))
+ (#\y (readerr "Transliteration unimplemented.")))))
+ `(lambda (target-string)
+ ,(if negated `(not ,body) body))))))
+
(named-readtables:defreadtable :consfigurator
(:merge :standard)
(:dispatch-macro-char #\# #\? #'cl-interpol:interpol-reader)
- (:dispatch-macro-char #\# #\> #'read-heredoc))
+ (:dispatch-macro-char #\# #\> #'read-heredoc)
+
+ (:dispatch-macro-char #\# #\~ #'perl-tilde-reader)
+ (:dispatch-macro-char #\# #\! #'perl-tilde-reader))
(named-readtables:defreadtable :consfigurator.without-read-eval
(:merge :consfigurator)