From e44af5d89c4d34591f2dfb4a208756db6f2c24d9 Mon Sep 17 00:00:00 2001 From: Sean Whitton Date: Sat, 21 Jan 2023 10:36:23 -0700 Subject: hstow: reduce number of ls(1), dirname(1), rm(1) and rmdir(1) calls Also improve comment: 'xargs readlink' can be used like 'xargs ls -ld'. --- bin/hstow | 100 ++++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 58 insertions(+), 42 deletions(-) (limited to 'bin/hstow') diff --git a/bin/hstow b/bin/hstow index a2cda67a..835e2a84 100755 --- a/bin/hstow +++ b/bin/hstow @@ -36,33 +36,48 @@ export LC_ALL=C tab="$(printf '\t')" cchars="$(printf '*[\001-\037\177]*')" -# We always bypass readlink(1), even if it's installed, for speed. -# -# This is a safe parse of ls(1) output given its POSIX specification. -# The long regexp is because only BREs have backreferences in POSIX. -# -# We want the ninth field in the output of ls(1) in awk's sense -- -# i.e. collapsing runs of multiple blanks between the first nine fields -# -- but we mustn't collapse blanks from there onwards. +. ~/src/dotfiles/lib-src/posix-defuns.sh + +# readlink(1) is not POSIX, so we require an alternative. +# For simplicity, always use this even if readlink(1) is installed. readlinks () { - local restc=$1 assume="${2:-false}" first= rest= ls= + local restc=$1 assume="${2:-false}" first= rest= + local fields_temp="$(mkstemp)" targets_temp="$(mkstemp)" + # This first pipeline involves a safe parse of ls(1) output given + # its POSIX specification. + # We want the ninth field onwards in the output of ls(1) in awk's + # sense -- i.e. collapsing runs of multiple blanks between the first + # nine fields -- but we mustn't collapse blanks from there onwards. + exec 3>&1 while read -r first rest; do + [ -n "$rest" ] && rest="$tab$rest" if $assume || [ -h "$first" ]; then - ls="$(ls -ld "$first")" - case "$ls" in - $cchars) - printf \ - 'hstow: skipping %s due to control chars in link target\n' \ - "$first" >&2 - continue - ;; - esac - [ -n "$rest" ] && rest="$rest$tab$ls" || rest=$ls + echo "$first" + echo "$first$rest" >>"$fields_temp" + else + echo "$first$rest" >&3 fi - printf '%s\t%s\n' "$first" "$rest" - done | sed 's/^\([^\t]\{1,\}\)\t\('\ + done \ + | awk '{ gsub(/"/, "\"'"'"'\"'"'"'\"") + printf "\"%s\"\n", $0 }' | xargs -E '' -- ls -ld \ + | sed -E 's/^([^[:blank:]]+[[:blank:]]+){8}//; /^\.$/d' \ + | sort -o "$targets_temp" # undo how ls(1) sorts non-dirs first + sort -t"$tab" -k1 -o "$fields_temp" "$fields_temp" + + if [ "$(wc -l <"$fields_temp")" \ + -ne "$(wc -l <"$targets_temp")" ]; then + rm "$fields_temp" "$targets_temp" + fail "line counts unequal; are there newlines in link targets?" + fi + + # Only BREs have backreferences in POSIX. + paste "$fields_temp" "$targets_temp" \ + | sed 's/^\([^\t]\{1,\}\)\t\('\ "$(printf "%${restc}s" | sed 's/ /[^\\t]\\{1,\\}\\t/g')"\ -'\)\([^[:blank:]]\{1,\}[[:blank:]]\{1,\}\)\{8\}\1 -> /\1\t\2/' +'\)\1 -> /\1\t\2/' \ + | awk -F'\t' 'NF == '$((2+$restc))' && $NF !~ /[\001-\037\177]/' + + rm "$fields_temp" "$targets_temp" } disjoin_file () { @@ -111,9 +126,6 @@ stow1 () { && adoptp=1 \ || adoptp="rel ~ /^($(disjoin_file .hstow-always-adopt "%s"))/" - # We have a separate pipeline element to find the link targets - # because with awk's system() we'd have to start a shell process for - # every record, in addition to the separate ls(1) process. find . ! -name . ! -type d ! -name "$cchars" \ ! -name .gitignore \ ! -name .hstow-local-ignore \ @@ -158,26 +170,30 @@ unstow () { cd "$HOME" # For speed, skip directories into which we'll never stow anything. - ignores="$(disjoin_file .hstow-unstow-ignore "./%s/")" + ignores="$(disjoin_file .hstow-unstow-ignore "./%s")" dir_pat=".$(echo $DIR | cut -c$(echo $HOME | wc -m | tr -d ' ')-)/" dirs_pat="$(echo "^($dir_pat|$ignores)" | sed -e 's#\.#\\.#g')" - for file in \ - $(find . ! \( -user "$(id -un)" -o -group "$(id -gn)" \) \ - -prune -o -type l ! -name . ! -name "$cchars" -print \ - | grep -Ev "$dirs_pat" | readlinks 0 true | awk -F'\t' \ - '$2 ~ /^(\.\.\/)*\.STOW\/'"$NAME"'\// { print $1 }'); do - rm "$file" - while true; do - file="$(dirname $file)" - [ "$file" = . ] && break - if [ -z "$(dir_contents $file)" ]; then - rmdir "$file" - else - break - fi - done - done + # awk's close() calls pclose(3), completing all the link deletions. + # POSIX.1 "Utility Description Defaults", "Consequences of Errors" + # implies that should rmdir(1) encounter a non-empty directory, it + # should proceed to attempt to remove the next operand. + # Thus, here, -p means that we do not need to sort the operands. + # We don't know the code with which rmdir(1) will exit, and if it is + # 255 then xargs will give up. So we wrap in a call to sh -c. + find . ! \( -user "$(id -un)" -o -group "$(id -gn)" \) \ + -prune -o -type l ! -name . ! -name "$cchars" -print \ + | grep -Ev "$dirs_pat" | readlinks 0 true \ + | awk -F'\t' -vOFS='\t' '$2 ~ /^(\.\.\/)*\.STOW\/'"$NAME"'\// \ +{ gsub(/"/, "\"'"'"'\"'"'"'\"", $1) + printf "\"%s\"\n", $1 | "xargs -E '' -- rm -f" + sub(/^.\//, "", $1) + sub(/\/?[^/]+$/, "", $1) + if ($1) dirs[++i] = $1 +} END { close("xargs -E '' -- rm -f") + for (d in dirs) printf "\"%s\"\n", dirs[d] +}' | xargs -E '' -- sh -c 'rmdir -p -- "$@" 2>/dev/null ||:' -- + [ -e "$HOME/.STOW/$NAME" ] && rm "$HOME/.STOW/$NAME" if [ -d "$HOME/.STOW" ] \ && [ "$(dir_contents $HOME/.STOW)" = "./.stow" ]; then -- cgit v1.2.3