summaryrefslogtreecommitdiff
path: root/bin/hstow
diff options
context:
space:
mode:
authorSean Whitton <spwhitton@spwhitton.name>2023-01-21 10:36:23 -0700
committerSean Whitton <spwhitton@spwhitton.name>2023-01-27 15:16:03 -0700
commite44af5d89c4d34591f2dfb4a208756db6f2c24d9 (patch)
tree1cdc401d1a3de586e5c2cd223232b6c49adfa550 /bin/hstow
parent9d9d6c00d461f554fa5b988d20e69f47ffb6bdcf (diff)
downloaddotfiles-e44af5d89c4d34591f2dfb4a208756db6f2c24d9.tar.gz
hstow: reduce number of ls(1), dirname(1), rm(1) and rmdir(1) calls
Also improve comment: 'xargs readlink' can be used like 'xargs ls -ld'.
Diffstat (limited to 'bin/hstow')
-rwxr-xr-xbin/hstow100
1 files changed, 58 insertions, 42 deletions
diff --git a/bin/hstow b/bin/hstow
index a2cda67a..835e2a84 100755
--- a/bin/hstow
+++ b/bin/hstow
@@ -36,33 +36,48 @@ export LC_ALL=C
tab="$(printf '\t')"
cchars="$(printf '*[\001-\037\177]*')"
-# We always bypass readlink(1), even if it's installed, for speed.
-#
-# This is a safe parse of ls(1) output given its POSIX specification.
-# The long regexp is because only BREs have backreferences in POSIX.
-#
-# We want the ninth field in the output of ls(1) in awk's sense --
-# i.e. collapsing runs of multiple blanks between the first nine fields
-# -- but we mustn't collapse blanks from there onwards.
+. ~/src/dotfiles/lib-src/posix-defuns.sh
+
+# readlink(1) is not POSIX, so we require an alternative.
+# For simplicity, always use this even if readlink(1) is installed.
readlinks () {
- local restc=$1 assume="${2:-false}" first= rest= ls=
+ local restc=$1 assume="${2:-false}" first= rest=
+ local fields_temp="$(mkstemp)" targets_temp="$(mkstemp)"
+ # This first pipeline involves a safe parse of ls(1) output given
+ # its POSIX specification.
+ # We want the ninth field onwards in the output of ls(1) in awk's
+ # sense -- i.e. collapsing runs of multiple blanks between the first
+ # nine fields -- but we mustn't collapse blanks from there onwards.
+ exec 3>&1
while read -r first rest; do
+ [ -n "$rest" ] && rest="$tab$rest"
if $assume || [ -h "$first" ]; then
- ls="$(ls -ld "$first")"
- case "$ls" in
- $cchars)
- printf \
- 'hstow: skipping %s due to control chars in link target\n' \
- "$first" >&2
- continue
- ;;
- esac
- [ -n "$rest" ] && rest="$rest$tab$ls" || rest=$ls
+ echo "$first"
+ echo "$first$rest" >>"$fields_temp"
+ else
+ echo "$first$rest" >&3
fi
- printf '%s\t%s\n' "$first" "$rest"
- done | sed 's/^\([^\t]\{1,\}\)\t\('\
+ done \
+ | awk '{ gsub(/"/, "\"'"'"'\"'"'"'\"")
+ printf "\"%s\"\n", $0 }' | xargs -E '' -- ls -ld \
+ | sed -E 's/^([^[:blank:]]+[[:blank:]]+){8}//; /^\.$/d' \
+ | sort -o "$targets_temp" # undo how ls(1) sorts non-dirs first
+ sort -t"$tab" -k1 -o "$fields_temp" "$fields_temp"
+
+ if [ "$(wc -l <"$fields_temp")" \
+ -ne "$(wc -l <"$targets_temp")" ]; then
+ rm "$fields_temp" "$targets_temp"
+ fail "line counts unequal; are there newlines in link targets?"
+ fi
+
+ # Only BREs have backreferences in POSIX.
+ paste "$fields_temp" "$targets_temp" \
+ | sed 's/^\([^\t]\{1,\}\)\t\('\
"$(printf "%${restc}s" | sed 's/ /[^\\t]\\{1,\\}\\t/g')"\
-'\)\([^[:blank:]]\{1,\}[[:blank:]]\{1,\}\)\{8\}\1 -> /\1\t\2/'
+'\)\1 -> /\1\t\2/' \
+ | awk -F'\t' 'NF == '$((2+$restc))' && $NF !~ /[\001-\037\177]/'
+
+ rm "$fields_temp" "$targets_temp"
}
disjoin_file () {
@@ -111,9 +126,6 @@ stow1 () {
&& adoptp=1 \
|| adoptp="rel ~ /^($(disjoin_file .hstow-always-adopt "%s"))/"
- # We have a separate pipeline element to find the link targets
- # because with awk's system() we'd have to start a shell process for
- # every record, in addition to the separate ls(1) process.
find . ! -name . ! -type d ! -name "$cchars" \
! -name .gitignore \
! -name .hstow-local-ignore \
@@ -158,26 +170,30 @@ unstow () {
cd "$HOME"
# For speed, skip directories into which we'll never stow anything.
- ignores="$(disjoin_file .hstow-unstow-ignore "./%s/")"
+ ignores="$(disjoin_file .hstow-unstow-ignore "./%s")"
dir_pat=".$(echo $DIR | cut -c$(echo $HOME | wc -m | tr -d ' ')-)/"
dirs_pat="$(echo "^($dir_pat|$ignores)" | sed -e 's#\.#\\.#g')"
- for file in \
- $(find . ! \( -user "$(id -un)" -o -group "$(id -gn)" \) \
- -prune -o -type l ! -name . ! -name "$cchars" -print \
- | grep -Ev "$dirs_pat" | readlinks 0 true | awk -F'\t' \
- '$2 ~ /^(\.\.\/)*\.STOW\/'"$NAME"'\// { print $1 }'); do
- rm "$file"
- while true; do
- file="$(dirname $file)"
- [ "$file" = . ] && break
- if [ -z "$(dir_contents $file)" ]; then
- rmdir "$file"
- else
- break
- fi
- done
- done
+ # awk's close() calls pclose(3), completing all the link deletions.
+ # POSIX.1 "Utility Description Defaults", "Consequences of Errors"
+ # implies that should rmdir(1) encounter a non-empty directory, it
+ # should proceed to attempt to remove the next operand.
+ # Thus, here, -p means that we do not need to sort the operands.
+ # We don't know the code with which rmdir(1) will exit, and if it is
+ # 255 then xargs will give up. So we wrap in a call to sh -c.
+ find . ! \( -user "$(id -un)" -o -group "$(id -gn)" \) \
+ -prune -o -type l ! -name . ! -name "$cchars" -print \
+ | grep -Ev "$dirs_pat" | readlinks 0 true \
+ | awk -F'\t' -vOFS='\t' '$2 ~ /^(\.\.\/)*\.STOW\/'"$NAME"'\// \
+{ gsub(/"/, "\"'"'"'\"'"'"'\"", $1)
+ printf "\"%s\"\n", $1 | "xargs -E '' -- rm -f"
+ sub(/^.\//, "", $1)
+ sub(/\/?[^/]+$/, "", $1)
+ if ($1) dirs[++i] = $1
+} END { close("xargs -E '' -- rm -f")
+ for (d in dirs) printf "\"%s\"\n", dirs[d]
+}' | xargs -E '' -- sh -c 'rmdir -p -- "$@" 2>/dev/null ||:' --
+
[ -e "$HOME/.STOW/$NAME" ] && rm "$HOME/.STOW/$NAME"
if [ -d "$HOME/.STOW" ] \
&& [ "$(dir_contents $HOME/.STOW)" = "./.stow" ]; then