hstow: reduce number of ls(1), dirname(1), rm(1) and rmdir(1) calls

Also improve comment: 'xargs readlink' can be used like 'xargs ls -ld'.
author: Sean Whitton <spwhitton@spwhitton.name> 2023-01-21 10:36:23 -0700
committer: Sean Whitton <spwhitton@spwhitton.name> 2023-01-27 15:16:03 -0700
commit: e44af5d89c4d34591f2dfb4a208756db6f2c24d9 (patch)
tree: 1cdc401d1a3de586e5c2cd223232b6c49adfa550 /bin/hstow
parent: 9d9d6c00d461f554fa5b988d20e69f47ffb6bdcf (diff)
download: dotfiles-e44af5d89c4d34591f2dfb4a208756db6f2c24d9.tar.gz
1 files changed, 58 insertions, 42 deletions
diff --git a/bin/hstow b/bin/hstow
index a2cda67a..835e2a84 100755
--- a/bin/hstow
+++ b/bin/hstow
@@ -36,33 +36,48 @@ export LC_ALL=C
 tab="$(printf '\t')"
 cchars="$(printf '*[\001-\037\177]*')"
 
-# We always bypass readlink(1), even if it's installed, for speed.
-#
-# This is a safe parse of ls(1) output given its POSIX specification.
-# The long regexp is because only BREs have backreferences in POSIX.
-#
-# We want the ninth field in the output of ls(1) in awk's sense --
-# i.e. collapsing runs of multiple blanks between the first nine fields
-# -- but we mustn't collapse blanks from there onwards.
+. ~/src/dotfiles/lib-src/posix-defuns.sh
+
+# readlink(1) is not POSIX, so we require an alternative.
+# For simplicity, always use this even if readlink(1) is installed.
 readlinks () {
-    local restc=$1 assume="${2:-false}" first= rest= ls=
+    local restc=$1 assume="${2:-false}" first= rest=
+    local fields_temp="$(mkstemp)" targets_temp="$(mkstemp)"
+    # This first pipeline involves a safe parse of ls(1) output given
+    # its POSIX specification.
+    # We want the ninth field onwards in the output of ls(1) in awk's
+    # sense -- i.e. collapsing runs of multiple blanks between the first
+    # nine fields -- but we mustn't collapse blanks from there onwards.
+    exec 3>&1
     while read -r first rest; do
+	[ -n "$rest" ] && rest="$tab$rest"
 	if $assume || [ -h "$first" ]; then
-	    ls="$(ls -ld "$first")"
-	    case "$ls" in
-		$cchars)
-		    printf \
-	    'hstow: skipping %s due to control chars in link target\n' \
-			"$first" >&2
-		    continue
-		    ;;
-	    esac
-	    [ -n "$rest" ] && rest="$rest$tab$ls" || rest=$ls
+	    echo "$first"
+	    echo "$first$rest" >>"$fields_temp"
+	else
+	    echo "$first$rest" >&3
 	fi
-	printf '%s\t%s\n' "$first" "$rest"
-    done | sed 's/^\([^\t]\{1,\}\)\t\('\
+    done \
+	| awk '{ gsub(/"/, "\"'"'"'\"'"'"'\"")
+		 printf "\"%s\"\n", $0 }' | xargs -E '' -- ls -ld \
+	| sed -E 's/^([^[:blank:]]+[[:blank:]]+){8}//; /^\.$/d' \
+	| sort -o "$targets_temp" # undo how ls(1) sorts non-dirs first
+    sort -t"$tab" -k1 -o "$fields_temp" "$fields_temp"
+
+    if [ "$(wc -l <"$fields_temp")" \
+	     -ne "$(wc -l <"$targets_temp")" ]; then
+	rm "$fields_temp" "$targets_temp"
+	fail "line counts unequal; are there newlines in link targets?"
+    fi
+
+    # Only BREs have backreferences in POSIX.
+    paste "$fields_temp" "$targets_temp" \
+	| sed 's/^\([^\t]\{1,\}\)\t\('\
 "$(printf "%${restc}s" | sed 's/ /[^\\t]\\{1,\\}\\t/g')"\
-'\)\([^[:blank:]]\{1,\}[[:blank:]]\{1,\}\)\{8\}\1 -> /\1\t\2/'
+'\)\1 -> /\1\t\2/' \
+	| awk -F'\t' 'NF == '$((2+$restc))' && $NF !~ /[\001-\037\177]/'
+
+    rm "$fields_temp" "$targets_temp"
 }
 
 disjoin_file () {
@@ -111,9 +126,6 @@ stow1 () {
 	&& adoptp=1 \
 	|| adoptp="rel ~ /^($(disjoin_file .hstow-always-adopt "%s"))/"
 
-    # We have a separate pipeline element to find the link targets
-    # because with awk's system() we'd have to start a shell process for
-    # every record, in addition to the separate ls(1) process.
     find . ! -name . ! -type d ! -name "$cchars" \
 	 ! -name .gitignore \
 	 ! -name .hstow-local-ignore \
@@ -158,26 +170,30 @@ unstow () {
     cd "$HOME"
 
     # For speed, skip directories into which we'll never stow anything.
-    ignores="$(disjoin_file .hstow-unstow-ignore "./%s/")"
+    ignores="$(disjoin_file .hstow-unstow-ignore "./%s")"
 
     dir_pat=".$(echo $DIR | cut -c$(echo $HOME | wc -m | tr -d ' ')-)/"
     dirs_pat="$(echo "^($dir_pat|$ignores)" | sed -e 's#\.#\\.#g')"
-    for file in \
-	$(find . ! \( -user "$(id -un)" -o -group "$(id -gn)" \) \
-	       -prune -o -type l ! -name . ! -name "$cchars" -print \
-	      | grep -Ev "$dirs_pat" | readlinks 0 true | awk -F'\t' \
-		'$2 ~ /^(\.\.\/)*\.STOW\/'"$NAME"'\// { print $1 }'); do
-	rm "$file"
-	while true; do
-	    file="$(dirname $file)"
-	    [ "$file" = . ] && break
-	    if [ -z "$(dir_contents $file)" ]; then
-		rmdir "$file"
-	    else
-		break
-	    fi
-	done
-    done
+    # awk's close() calls pclose(3), completing all the link deletions.
+    # POSIX.1 "Utility Description Defaults", "Consequences of Errors"
+    # implies that should rmdir(1) encounter a non-empty directory, it
+    # should proceed to attempt to remove the next operand.
+    # Thus, here, -p means that we do not need to sort the operands.
+    # We don't know the code with which rmdir(1) will exit, and if it is
+    # 255 then xargs will give up.  So we wrap in a call to sh -c.
+    find . ! \( -user "$(id -un)" -o -group "$(id -gn)" \) \
+	 -prune -o -type l ! -name . ! -name "$cchars" -print \
+	| grep -Ev "$dirs_pat" | readlinks 0 true \
+	| awk -F'\t' -vOFS='\t' '$2 ~ /^(\.\.\/)*\.STOW\/'"$NAME"'\// \
+{ gsub(/"/, "\"'"'"'\"'"'"'\"", $1)
+  printf "\"%s\"\n", $1 | "xargs -E '' -- rm -f"
+  sub(/^.\//, "", $1)
+  sub(/\/?[^/]+$/, "", $1)
+  if ($1) dirs[++i] = $1
+} END { close("xargs -E '' -- rm -f")
+	for (d in dirs) printf "\"%s\"\n", dirs[d]
+}' | xargs -E '' -- sh -c 'rmdir -p -- "$@" 2>/dev/null ||:' --
+
     [ -e "$HOME/.STOW/$NAME" ] && rm "$HOME/.STOW/$NAME"
     if [ -d "$HOME/.STOW" ] \
 	   && [ "$(dir_contents $HOME/.STOW)" = "./.stow" ]; then
author	Sean Whitton <spwhitton@spwhitton.name>	2023-01-21 10:36:23 -0700
committer	Sean Whitton <spwhitton@spwhitton.name>	2023-01-27 15:16:03 -0700
commit	e44af5d89c4d34591f2dfb4a208756db6f2c24d9 (patch)
tree	1cdc401d1a3de586e5c2cd223232b6c49adfa550 /bin/hstow
parent	9d9d6c00d461f554fa5b988d20e69f47ffb6bdcf (diff)
download	dotfiles-e44af5d89c4d34591f2dfb4a208756db6f2c24d9.tar.gz