From ae5f2c02bd2fc269e2cc32c8039d95fbf4225e69 Mon Sep 17 00:00:00 2001 From: Mattias EngdegÄrd Date: Tue, 19 Mar 2024 13:03:47 +0100 Subject: New `sort` keyword arguments (bug#69709) Add the :key, :lessp, :reverse and :in-place keyword arguments. The old calling style remains available and is unchanged. * src/fns.c (sort_list, sort_vector, Fsort): * src/sort.c (tim_sort): Add keyword arguments with associated new features. All callers of Fsort adapted. * test/src/fns-tests.el (fns-tests--shuffle-vector, fns-tests-sort-kw): New test. * doc/lispref/sequences.texi (Sequence Functions): Update manual. * etc/NEWS: Announce. --- doc/lispref/sequences.texi | 131 +++++++++++++++++++++++---------------------- etc/NEWS | 25 +++++++++ src/dired.c | 2 +- src/fns.c | 92 ++++++++++++++++++++++++++----- src/lisp.h | 3 +- src/pdumper.c | 6 +-- src/sort.c | 14 ++--- test/src/fns-tests.el | 43 +++++++++++++++ 8 files changed, 229 insertions(+), 87 deletions(-) diff --git a/doc/lispref/sequences.texi b/doc/lispref/sequences.texi index 5bdf71fe02e..6322f17e77b 100644 --- a/doc/lispref/sequences.texi +++ b/doc/lispref/sequences.texi @@ -350,94 +350,99 @@ encouraged to treat strings as immutable even when they are mutable. @end defun -@defun sort sequence predicate +@defun sort sequence &rest keyword-args @cindex stable sort @cindex sorting lists @cindex sorting vectors -This function sorts @var{sequence} stably. Note that this function doesn't work -for all sequences; it may be used only for lists and vectors. If @var{sequence} -is a list, it is modified destructively. This functions returns the sorted -@var{sequence} and compares elements using @var{predicate}. A stable sort is -one in which elements with equal sort keys maintain their relative order before -and after the sort. Stability is important when successive sorts are used to -order elements according to different criteria. +This function sorts @var{sequence}, which must be a list or vector, and +returns a sorted sequence of the same type. +The sort is stable, which means that elements with equal sort keys maintain +their relative order. It takes the following optional keyword arguments: -The argument @var{predicate} must be a function that accepts two -arguments. It is called with two elements of @var{sequence}. To get an -increasing order sort, the @var{predicate} should return non-@code{nil} if the -first element is ``less'' than the second, or @code{nil} if not. +@table @asis +@item :key @var{keyfunc} +Use @var{keyfunc}, a function that takes a single element from +@var{sequence} and returns its key value, to generate the keys used in +comparison. If this argument is absent or if @var{keyfunc} is +@code{nil} then @code{identity} is assumed; that is, the elements +themselves are used as sorting keys. + +@item :lessp @var{predicate} +Use @var{predicate} to order the keys. @var{predicate} is a function +that takes two sort keys as arguments and returns non-@code{nil} if the +first should come before the second. If this argument is absent or +@var{predicate} is @code{nil}, then @code{value<} is used, which +is applicable to many different Lisp types and generally sorts in +ascending order (@pxref{definition of value<}). + +For consistency, any predicate must obey the following rules: +@itemize @bullet +@item +It must be @dfn{antisymmetric}: it cannot both order @var{a} before +@var{b} and @var{b} before @var{a}. +@item +It must be @dfn{transitive}: if it orders @var{a} before @var{b} and +@var{b} before @var{c}, then it must also order @var{a} before @var{c}. +@end itemize -The comparison function @var{predicate} must give reliable results for -any given pair of arguments, at least within a single call to -@code{sort}. It must be @dfn{antisymmetric}; that is, if @var{a} is -less than @var{b}, @var{b} must not be less than @var{a}. It must be -@dfn{transitive}---that is, if @var{a} is less than @var{b}, and @var{b} -is less than @var{c}, then @var{a} must be less than @var{c}. If you -use a comparison function which does not meet these requirements, the -result of @code{sort} is unpredictable. +@item :reverse @var{flag} +If @var{flag} is non-@code{nil}, the sorting order is reversed. With +the default @code{:lessp} predicate this means sorting in descending order. -The destructive aspect of @code{sort} for lists is that it reuses the -cons cells forming @var{sequence} by changing their contents, possibly -rearranging them in a different order. This means that the value of -the input list is undefined after sorting; only the list returned by -@code{sort} has a well-defined value. Example: +@item :in-place @var{flag} +If @var{flag} is non-@code{nil}, then @var{sequence} is sorted in-place +(destructively) and returned. If @code{nil}, or if this argument is not +given, a sorted copy of the input is returned and @var{sequence} itself +remains unmodified. In-place sorting is slightly faster, but the +original sequence is lost. +@end table + +If the default behaviour is not suitable for your needs, it is usually +easier and faster to supply a new @code{:key} function than a different +@code{:lessp} predicate. For example, consider sorting these strings: @example -@group -(setq nums (list 2 1 4 3 0)) -(sort nums #'<) - @result{} (0 1 2 3 4) - ; nums is unpredictable at this point -@end group +(setq numbers '("one" "two" "three" "four" "five" "six")) +(sort numbers) + @result{} ("five" "four" "one" "six" "three" "two") @end example -Most often we store the result back into the variable that held the -original list: +You can sort the strings by length instead by supplying a different key +function: @example -(setq nums (sort nums #'<)) +(sort numbers :key #'length) + @result{} ("one" "two" "six" "four" "five" "three") @end example -If you wish to make a sorted copy without destroying the original, -copy it first and then sort: +Note how strings of the same length keep their original order, thanks to +the sorting stability. Now suppose you want to sort by length, but use +the string contents to break ties. The easiest way is to specify a key +function that transforms an element to a value that is sorted this way. +Since @code{value<} orders compound objects (conses, lists, +vectors and records) lexicographically, you could do: @example -@group -(setq nums (list 2 1 4 3 0)) -(sort (copy-sequence nums) #'<) - @result{} (0 1 2 3 4) -@end group -@group -nums - @result{} (2 1 4 3 0) -@end group +(sort numbers :key (lambda (x) (cons (length x) x))) + @result{} ("one" "six" "two" "five" "four" "three") @end example -For the better understanding of what stable sort is, consider the following -vector example. After sorting, all items whose @code{car} is 8 are grouped -at the beginning of @code{vector}, but their relative order is preserved. -All items whose @code{car} is 9 are grouped at the end of @code{vector}, -but their relative order is also preserved: +because @code{(3 . "six")} is ordered before @code{(3 . "two")} and so on. + +For compatibility with old versions of Emacs, the @code{sort} function +can also be called using the fixed two-argument form @example -@group -(setq - vector - (vector '(8 . "xxx") '(9 . "aaa") '(8 . "bbb") '(9 . "zzz") - '(9 . "ppp") '(8 . "ttt") '(8 . "eee") '(9 . "fff"))) - @result{} [(8 . "xxx") (9 . "aaa") (8 . "bbb") (9 . "zzz") - (9 . "ppp") (8 . "ttt") (8 . "eee") (9 . "fff")] -@end group -@group -(sort vector (lambda (x y) (< (car x) (car y)))) - @result{} [(8 . "xxx") (8 . "bbb") (8 . "ttt") (8 . "eee") - (9 . "aaa") (9 . "zzz") (9 . "ppp") (9 . "fff")] -@end group +(@code{sort} @var{sequence} @var{predicate}) @end example + +where @var{predicate} is the @code{:lessp} argument. When using this +form, sorting is always done in-place. @end defun @cindex comparing values @cindex standard sorting order +@anchor{definition of value<} @defun value< a b This function returns non-@code{nil} if @var{a} comes before @var{b} in the standard sorting order; this means that it returns @code{nil} when diff --git a/etc/NEWS b/etc/NEWS index 73ffff9f2d3..4018df1fecb 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1770,6 +1770,31 @@ lexicographically. It is intended as a convenient ordering predicate for sorting, and is likely to be faster than hand-written Lisp functions. ++++ +** New 'sort' arguments and features. +The 'sort' function can now be called using the signature + + (sort SEQ &rest KEYWORD-ARGUMENTS) + +where arguments after the first are keyword/value pairs, all optional: +':key' specifies a function that produces the sorting key from an element, +':lessp' specifies the ordering predicate, defaulting to 'value<', +':reverse' is used to reverse the sorting order, +':in-place is used for in-place sorting, as the default is now to +sort a copy of the input. + +The new signature is less error-prone and reduces the need to write +ordering predicates by hand. We recommend that you use the ':key' +argument instead of ':lessp' unless a suitable ordering predicate is +already available. This can also be used for multi-key sorting: + + (sort seq :key (lambda (x) (list (age x) (size x) (cost x)))) + +sorts by the return value of 'age', then by 'size', then by 'cost'. + +The old signature, '(sort SEQ PREDICATE)', can still be used and sorts +its input in-place as before. + ** New function 'sort-on'. This function implements the Schwartzian transform, and is appropriate for sorting lists when the computation of the sort key of a list diff --git a/src/dired.c b/src/dired.c index 9a372201ae0..bfbacf70917 100644 --- a/src/dired.c +++ b/src/dired.c @@ -351,7 +351,7 @@ directory_files_internal (Lisp_Object directory, Lisp_Object full, specpdl_ptr = specpdl_ref_to_ptr (count); if (NILP (nosort)) - list = Fsort (Fnreverse (list), + list = CALLN (Fsort, Fnreverse (list), attrs ? Qfile_attributes_lessp : Qstring_lessp); (void) directory_volatile; diff --git a/src/fns.c b/src/fns.c index a3ef99f67a8..7eacf99cbba 100644 --- a/src/fns.c +++ b/src/fns.c @@ -2353,7 +2353,8 @@ See also the function `nreverse', which is used more often. */) is destructively reused to hold the sorted result. */ static Lisp_Object -sort_list (Lisp_Object list, Lisp_Object predicate, Lisp_Object keyfunc) +sort_list (Lisp_Object list, Lisp_Object predicate, Lisp_Object keyfunc, + bool reverse) { ptrdiff_t length = list_length (list); if (length < 2) @@ -2369,7 +2370,7 @@ sort_list (Lisp_Object list, Lisp_Object predicate, Lisp_Object keyfunc) result[i] = Fcar (tail); tail = XCDR (tail); } - tim_sort (predicate, keyfunc, result, length); + tim_sort (predicate, keyfunc, result, length, reverse); ptrdiff_t i = 0; tail = list; @@ -2388,27 +2389,86 @@ sort_list (Lisp_Object list, Lisp_Object predicate, Lisp_Object keyfunc) algorithm. */ static void -sort_vector (Lisp_Object vector, Lisp_Object predicate, Lisp_Object keyfunc) +sort_vector (Lisp_Object vector, Lisp_Object predicate, Lisp_Object keyfunc, + bool reverse) { ptrdiff_t length = ASIZE (vector); if (length < 2) return; - tim_sort (predicate, keyfunc, XVECTOR (vector)->contents, length); + tim_sort (predicate, keyfunc, XVECTOR (vector)->contents, length, reverse); } -DEFUN ("sort", Fsort, Ssort, 2, 2, 0, - doc: /* Sort SEQ, stably, comparing elements using PREDICATE. -Returns the sorted sequence. SEQ should be a list or vector. SEQ is -modified by side effects. PREDICATE is called with two elements of -SEQ, and should return non-nil if the first element should sort before -the second. */) - (Lisp_Object seq, Lisp_Object predicate) +DEFUN ("sort", Fsort, Ssort, 1, MANY, 0, + doc: /* Sort SEQ, stably, and return the sorted sequence. +SEQ should be a list or vector. +Optional arguments are specified as keyword/argument pairs. The following +arguments are defined: + +:key FUNC -- FUNC is a function that takes a single element from SEQ and + returns the key value to be used in comparison. If absent or nil, + `identity' is used. + +:lessp FUNC -- FUNC is a function that takes two arguments and returns + non-nil if the first element should come before the second. + If absent or nil, `value<' is used. + +:reverse BOOL -- if BOOL is non-nil, the sorting order implied by FUNC is + reversed. This does not affect stability: equal elements still retain + their order in the input sequence. + +:in-place BOOL -- if BOOL is non-nil, SEQ is sorted in-place and returned. + Otherwise, a sorted copy of SEQ is returned and SEQ remains unmodified; + this is the default. + +For compatibility, the calling convention (sort SEQ LESSP) can also be used; +in this case, sorting is always done in-place. + +usage: (sort SEQ &key KEY LESSP REVERSE IN-PLACE) */) + (ptrdiff_t nargs, Lisp_Object *args) { + Lisp_Object seq = args[0]; + Lisp_Object key = Qnil; + Lisp_Object lessp = Qnil; + bool inplace = false; + bool reverse = false; + if (nargs == 2) + { + /* old-style invocation without keywords */ + lessp = args[1]; + inplace = true; + } + else if ((nargs & 1) == 0) + error ("Invalid argument list"); + else + for (ptrdiff_t i = 1; i < nargs - 1; i += 2) + { + if (EQ (args[i], QCkey)) + key = args[i + 1]; + else if (EQ (args[i], QClessp)) + lessp = args[i + 1]; + else if (EQ (args[i], QCin_place)) + inplace = !NILP (args[i + 1]); + else if (EQ (args[i], QCreverse)) + reverse = !NILP (args[i + 1]); + else + signal_error ("Invalid keyword argument", args[i]); + } + + if (NILP (lessp)) + /* FIXME: normalise it as Qnil instead, and special-case it in tim_sort? + That would remove the funcall overhead for the common case. */ + lessp = Qvaluelt; + + /* FIXME: for lists it may be slightly faster to make the copy after + sorting? Measure. */ + if (!inplace) + seq = Fcopy_sequence (seq); + if (CONSP (seq)) - seq = sort_list (seq, predicate, Qnil); + seq = sort_list (seq, lessp, key, reverse); else if (VECTORP (seq)) - sort_vector (seq, predicate, Qnil); + sort_vector (seq, lessp, key, reverse); else if (!NILP (seq)) wrong_type_argument (Qlist_or_vector_p, seq); return seq; @@ -6860,4 +6920,10 @@ For best results this should end in a space. */); DEFSYM (Qfrom__tty_menu_p, "from--tty-menu-p"); DEFSYM (Qyes_or_no_p, "yes-or-no-p"); DEFSYM (Qy_or_n_p, "y-or-n-p"); + + DEFSYM (QCkey, ":key"); + DEFSYM (QClessp, ":lessp"); + DEFSYM (QCin_place, ":in-place"); + DEFSYM (QCreverse, ":reverse"); + DEFSYM (Qvaluelt, "value<"); } diff --git a/src/lisp.h b/src/lisp.h index 14c0b8e4d1c..6226ab33244 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -4299,7 +4299,8 @@ extern void syms_of_fns (void); extern void mark_fns (void); /* Defined in sort.c */ -extern void tim_sort (Lisp_Object, Lisp_Object, Lisp_Object *, const ptrdiff_t); +extern void tim_sort (Lisp_Object, Lisp_Object, Lisp_Object *, const ptrdiff_t, + bool); /* Defined in floatfns.c. */ verify (FLT_RADIX == 2 || FLT_RADIX == 16); diff --git a/src/pdumper.c b/src/pdumper.c index c7ebb38dea5..ac8bf6f31f4 100644 --- a/src/pdumper.c +++ b/src/pdumper.c @@ -3368,7 +3368,7 @@ dump_sort_copied_objects (struct dump_context *ctx) file and the copy into Emacs in-order, where prefetch will be most effective. */ ctx->copied_queue = - Fsort (Fnreverse (ctx->copied_queue), + CALLN (Fsort, Fnreverse (ctx->copied_queue), Qdump_emacs_portable__sort_predicate_copied); } @@ -3935,7 +3935,7 @@ drain_reloc_list (struct dump_context *ctx, { struct dump_flags old_flags = ctx->flags; ctx->flags.pack_objects = true; - Lisp_Object relocs = Fsort (Fnreverse (*reloc_list), + Lisp_Object relocs = CALLN (Fsort, Fnreverse (*reloc_list), Qdump_emacs_portable__sort_predicate); *reloc_list = Qnil; dump_align_output (ctx, max (alignof (struct dump_reloc), @@ -4057,7 +4057,7 @@ static void dump_do_fixups (struct dump_context *ctx) { dump_off saved_offset = ctx->offset; - Lisp_Object fixups = Fsort (Fnreverse (ctx->fixups), + Lisp_Object fixups = CALLN (Fsort, Fnreverse (ctx->fixups), Qdump_emacs_portable__sort_predicate); Lisp_Object prev_fixup = Qnil; ctx->fixups = Qnil; diff --git a/src/sort.c b/src/sort.c index d91993c8c65..a0f127c35b3 100644 --- a/src/sort.c +++ b/src/sort.c @@ -1072,11 +1072,11 @@ resolve_fun (Lisp_Object fun) } /* Sort the array SEQ with LENGTH elements in the order determined by - PREDICATE. */ - + PREDICATE (where Qnil means value<) and KEYFUNC (where Qnil means identity), + optionally reversed. */ void tim_sort (Lisp_Object predicate, Lisp_Object keyfunc, - Lisp_Object *seq, const ptrdiff_t length) + Lisp_Object *seq, const ptrdiff_t length, bool reverse) { /* FIXME: optimise for the predicate being value<; at the very least we'd go without the Lisp funcall overhead. */ @@ -1091,9 +1091,8 @@ tim_sort (Lisp_Object predicate, Lisp_Object keyfunc, if (EQ (keyfunc, Qidentity)) keyfunc = Qnil; - /* FIXME: consider a built-in reverse sorting flag: we would reverse - the input in-place here and reverse it back just before - returning. */ + if (reverse) + reverse_slice (seq, seq + length); /* preserve stability */ if (NILP (keyfunc)) { @@ -1159,6 +1158,9 @@ tim_sort (Lisp_Object predicate, Lisp_Object keyfunc, eassume (ms.pending[0].len == length); lo = ms.pending[0].base; + if (reverse) + reverse_slice (seq, seq + length); + if (ms.a.keys != ms.temparray || allocated_keys != NULL) unbind_to (ms.count, Qnil); } diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el index 844000cdc76..1b13785a9fc 100644 --- a/test/src/fns-tests.el +++ b/test/src/fns-tests.el @@ -375,6 +375,49 @@ (should (equal (should-error (sort "cba" #'<) :type 'wrong-type-argument) '(wrong-type-argument list-or-vector-p "cba")))) +(defun fns-tests--shuffle-vector (vect) + "Shuffle VECT in place." + (let ((n (length vect))) + (dotimes (i (1- n)) + (let* ((j (+ i (random (- n i)))) + (vi (aref vect i))) + (aset vect i (aref vect j)) + (aset vect j vi))))) + +(ert-deftest fns-tests-sort-kw () + ;; Test the `sort' keyword calling convention by comparing with + ;; the results from using the old (positional) style tested above. + (random "my seed") + (dolist (size '(0 1 2 3 10 100 1000)) + ;; Use a vector with both positive and negative numbers (asymmetric). + (let ((numbers (vconcat + (number-sequence (- (/ size 3)) (- size 1 (/ size 3)))))) + (fns-tests--shuffle-vector numbers) + ;; Test both list and vector input. + (dolist (input (list (append numbers nil) numbers)) + (dolist (in-place '(nil t)) + (dolist (reverse '(nil t)) + (dolist (key '(nil abs)) + (dolist (lessp '(nil >)) + (let* ((seq (copy-sequence input)) + (res (sort seq :key key :lessp lessp + :in-place in-place :reverse reverse)) + (pred (or lessp #'value<)) + (exp-in (copy-sequence input)) + (exp-out + (sort (if reverse (reverse exp-in) exp-in) + (if key + (lambda (a b) + (funcall pred + (funcall key a) (funcall key b))) + pred))) + (expected (if reverse (reverse exp-out) exp-out))) + (should (equal res expected)) + (if in-place + (should (eq res seq)) + (should-not (and (> size 0) (eq res seq))) + (should (equal seq input)))))))))))) + (defvar w32-collate-ignore-punctuation) (ert-deftest fns-tests-collate-sort () -- cgit v1.2.3