From 9d9d6c00d461f554fa5b988d20e69f47ffb6bdcf Mon Sep 17 00:00:00 2001 From: Sean Whitton Date: Thu, 19 Jan 2023 10:50:19 -0700 Subject: hstow: speed up stowing by using more streaming Extend the dot[-.] feature to intermediate pathname components. We no longer match $path_dir against the ignore patterns. I can't see how that was useful. --- bin/hstow | 106 +++++++++++++++++++++++++++++--------------------------------- 1 file changed, 50 insertions(+), 56 deletions(-) (limited to 'bin/hstow') diff --git a/bin/hstow b/bin/hstow index 7aba04de..a2cda67a 100755 --- a/bin/hstow +++ b/bin/hstow @@ -36,22 +36,14 @@ export LC_ALL=C tab="$(printf '\t')" cchars="$(printf '*[\001-\037\177]*')" -if ! command -v readlink >/dev/null; then - readlink () { - # Safe parse of ls(1) output given its POSIX specification. - # - # We want the ninth field onwards in awk's sense -- - # i.e. collapsing multiple runs of blanks between the first nine - # fields -- but we mustn't collapse blanks from there onwards. - n=$((3 + $(echo "$1" | wc -m))) - ls -ld "$1" \ - | sed -E \ - "s/^[[:blank:]]*([^[:blank:]]+[[:blank:]]+){8}.{$n}//" - } -fi - -# Here we unconditionally bypass readlink(1), for speed. +# We always bypass readlink(1), even if it's installed, for speed. +# +# This is a safe parse of ls(1) output given its POSIX specification. # The long regexp is because only BREs have backreferences in POSIX. +# +# We want the ninth field in the output of ls(1) in awk's sense -- +# i.e. collapsing runs of multiple blanks between the first nine fields +# -- but we mustn't collapse blanks from there onwards. readlinks () { local restc=$1 assume="${2:-false}" first= rest= ls= while read -r first rest; do @@ -73,11 +65,11 @@ readlinks () { '\)\([^[:blank:]]\{1,\}[[:blank:]]\{1,\}\)\{8\}\1 -> /\1\t\2/' } -disjoin_hstow_file () { +disjoin_file () { if [ -e "$DIR/$1" ]; then while read -r line; do [ -n "$line" ] && printf "|$2" "$line" - done <"$DIR/$1" | cut -c2- + done <"$DIR/$1" | sed 's#^.##; s#/#\\/#g' fi } @@ -100,50 +92,56 @@ stow () { [ -f "$HOME/.STOW/.stow" ] || touch "$HOME/.STOW/.stow" [ -h "$HOME/.STOW/$NAME" ] \ || ( cd "$HOME/.STOW"; ln -s "$DIR" "$NAME" ) - conflicts= - ignores="$(disjoin_hstow_file .hstow-local-ignore "./%s")" + + conflicts="$(stow1)" + [ -z "$conflicts" ] && return + echo >&2 "hstow: encountered conflicts:" + for conflict in $conflicts; do echo >&2 " $conflict"; done + exit 127 +} + +stow1 () { + ignores="$(disjoin_file .hstow-local-ignore "./%s")" # Files that (i) always/often have their symlinks replaced with # regular files when applications access them; and (ii) we don't # ever want to edit the copy under $DIR directly, but only via the # link/copy under $HOME. $always_adopt \ - || adoptions="$(disjoin_hstow_file .hstow-always-adopt "./%s")" - - for file in $(find . ! -name . ! -type d ! -name "$cchars" \ - ! -name .gitignore \ - ! -name .hstow-local-ignore \ - ! -name .hstow-always-adopt \ - ! -name .hstow-unstow-ignore \ - | grep -v '^\./\.git/'); do - file_dir="$(dirname $file)" - if [ -n "$ignores" ]; then - eval case "'$file'" in "${ignores})" continue ";;" esac - eval case "'$file_dir'" in "${ignores})" continue ";;" esac - fi - - rel="$(echo $file|sed -E 's#/dot[-.]([^/]+)#/.\1#g; s#^\./##')" - dotdotslashes="$(echo $rel|sed -E 's#[^/]*$##; s#[^/]+#..#g')" - target="${dotdotslashes}.STOW/$NAME/$rel" - link="$HOME/$rel" - link_target= - [ -h "$link" ] && link_target="$(readlink $link)" - - [ "$target" = "$link_target" ] && continue - - if [ ! -h "$link" -a ! -h "$file" -a -f "$link" ]; then - if $always_adopt \ - || ( [ -n "$adoptions" ] \ - && eval case "'$file'" in \ - "${adoptions})" exit 0 ";;" \ - "*)" exit 1 ";;" \ - esac ); then + && adoptp=1 \ + || adoptp="rel ~ /^($(disjoin_file .hstow-always-adopt "%s"))/" + + # We have a separate pipeline element to find the link targets + # because with awk's system() we'd have to start a shell process for + # every record, in addition to the separate ls(1) process. + find . ! -name . ! -type d ! -name "$cchars" \ + ! -name .gitignore \ + ! -name .hstow-local-ignore \ + ! -name .hstow-always-adopt \ + ! -name .hstow-unstow-ignore \ + | awk -F'\n' -vOFS='\t' '! /^(\.\/\.git\/|'"$ignores"')/ \ +{ rel = $1; gsub(/\/dot[-.]/, "/.", rel); gsub(/^\.\//, "", rel) + dotdotslashes = rel + sub(/[^\/]*$/, "", dotdotslashes) + gsub(/[^\/]+/, "..", dotdotslashes) + $2 = $1 + $3 = $1; sub(/\/[^/]+$/, "", $3) + $1 = ENVIRON["HOME"] "/" rel + $4 = '"$adoptp"' ? "true" : "false" + $5 = dotdotslashes ".STOW/'"$NAME"'/" rel + print +}' | readlinks 4 \ + | while read -r link file file_dir adopt target link_target; do + if [ "$target" = "$link_target" ]; then + continue + elif [ -z "$link_target" -a ! -h "$file" -a -f "$link" ]; then + if $adopt; then mv -f "$link" "$file" ln -s "$target" "$link" else - conflicts="$conflicts${tab}$file" + echo "$file" # into $conflicts fi - elif [ -h "$link" ]; then + elif [ -n "$link_target" ]; then # With at least GNU ln(1), passing -f, but not also -T, does # not replace an existing link in some cases. # -T is not POSIX, so we remove any existing link first. @@ -154,17 +152,13 @@ stow () { ln -s "$target" "$link" fi done - [ -z "$conflicts" ] && return - echo >&2 "hstow: encountered conflicts:" - for conflict in $conflicts; do echo >&2 " $conflict"; done - exit 127 } unstow () { cd "$HOME" # For speed, skip directories into which we'll never stow anything. - ignores="$(disjoin_hstow_file .hstow-unstow-ignore "./%s/")" + ignores="$(disjoin_file .hstow-unstow-ignore "./%s/")" dir_pat=".$(echo $DIR | cut -c$(echo $HOME | wc -m | tr -d ' ')-)/" dirs_pat="$(echo "^($dir_pat|$ignores)" | sed -e 's#\.#\\.#g')" -- cgit v1.2.3