summaryrefslogtreecommitdiff
path: root/lib-src
diff options
context:
space:
mode:
authorSean Whitton <spwhitton@spwhitton.name>2023-01-29 09:41:46 -0700
committerSean Whitton <spwhitton@spwhitton.name>2023-01-29 09:42:22 -0700
commitac6e348c052820fafd569549622246b4d238a231 (patch)
tree3be826b70dfe1c9b669672b860b602d2b457268d /lib-src
parent7df546cc70711d89e76417a7b5c4b14597d21608 (diff)
downloaddotfiles-ac6e348c052820fafd569549622246b4d238a231.tar.gz
hstow: use globs in .hstow-always-adopt & convert to EREs for awk
Diffstat (limited to 'lib-src')
-rw-r--r--lib-src/globs2ere.awk83
1 files changed, 83 insertions, 0 deletions
diff --git a/lib-src/globs2ere.awk b/lib-src/globs2ere.awk
new file mode 100644
index 00000000..a7038800
--- /dev/null
+++ b/lib-src/globs2ere.awk
@@ -0,0 +1,83 @@
+# Copyright (C) 2023 Sean Whitton <spwhitton@spwhitton.name>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or (at
+# your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# Where each input record is a glob, output a single ERE matching the
+# disjunction of all the non-empty input records.
+# This is for matching, not expansion: '/' and '.' are not treated specially.
+# There is no shell quotation removal, and we do not yet support collating
+# symbols or equivalence classes within bracket expressions.
+# There is no input validation.
+
+# One field per record.
+BEGIN { FS = RS }
+
+function getchar () { c = substr($0, ++i, 1) }
+
+length {
+ res[++rl] = "^"
+ while (i < length) {
+ getchar()
+ if (c == "*") {
+ if (rl == 1)
+ rl--
+ else if (res[rl] != ".*")
+ res[++rl] = ".*"
+ } else if (c == "?")
+ res[++rl] = "."
+ else if (c == "[") {
+ res[++rl] = "["; getchar()
+ if (c == "!") { res[++rl] = "^"; getchar() }
+ if (c == "]") { res[++rl] = "]"; getchar() }
+ if (c == "^") { circ = 1; getchar() } else circ = 0
+ while (c != "]") {
+ rest = substr($0, i)
+ if (match(rest, /^\[:[a-z]+:\]/) == 1) {
+ res[++rl] = substr(rest, 1, RLENGTH)
+ i += RLENGTH; c = substr($0, i, 1)
+ } else
+ res[++rl] = c; getchar()
+ }
+ res[++rl] = circ ? "^]" : "]"
+ } else if (c == "\\") {
+ getchar()
+ escaped(c)
+ } else
+ escaped(c)
+ }
+ if (res[rl] == ".*")
+ rl--
+ else
+ res[++rl] = "$"
+
+ j++
+ for (i = 1; i <= rl; i++)
+ all[j] = all[j] res[i]
+
+ i = 0; rl = 0; split("", res)
+}
+
+# In an ERE, we can use a backslash to escape any character.
+# However, it is good to avoid generating longer EREs than are necessary.
+# We do escape forward slashes, for ease of use with awk.
+function escaped (c) { res[++rl] = c ~ /[[.(*+?{|^$\/\\]/ ? "\\" c : c }
+
+END {
+ if (j) {
+ printf all[1]
+ for (i = 2; i <= j; i++)
+ printf "|%s", all[i]
+ printf ORS
+ }
+}