1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
# Copyright (C) 2023 Sean Whitton <spwhitton@spwhitton.name>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# Where each input record is a glob, output a single ERE matching the
# disjunction of all the non-empty input records.
# This is for matching, not expansion: '/' and '.' are not treated specially.
# There is no shell quotation removal, and we do not yet support collating
# symbols or equivalence classes within bracket expressions.
# There is no input validation.
# One field per record.
BEGIN { FS = RS }
function getchar () { c = substr($0, ++i, 1) }
length {
res[++rl] = "^"
while (i < length) {
getchar()
if (c == "*") {
if (rl == 1)
rl--
else if (res[rl] != ".*")
res[++rl] = ".*"
} else if (c == "?")
res[++rl] = "."
else if (c == "[") {
res[++rl] = "["; getchar()
if (c == "!") { res[++rl] = "^"; getchar() }
if (c == "]") { res[++rl] = "]"; getchar() }
if (c == "^") { circ = 1; getchar() } else circ = 0
while (c != "]") {
rest = substr($0, i)
if (match(rest, /^\[:[a-z]+:\]/) == 1) {
res[++rl] = substr(rest, 1, RLENGTH)
i += RLENGTH; c = substr($0, i, 1)
} else
res[++rl] = c; getchar()
}
res[++rl] = circ ? "^]" : "]"
} else if (c == "\\") {
getchar()
escaped(c)
} else
escaped(c)
}
if (res[rl] == ".*")
rl--
else
res[++rl] = "$"
j++
for (i = 1; i <= rl; i++)
all[j] = all[j] res[i]
i = 0; rl = 0; split("", res)
}
# In an ERE, we can use a backslash to escape any character.
# However, it is good to avoid generating longer EREs than are necessary.
# We do escape forward slashes, for ease of use with awk.
function escaped (c) { res[++rl] = c ~ /[[(.*+?{|^$\/\\]/ ? "\\" c : c }
END {
if (j) {
printf all[1]
for (i = 2; i <= j; i++)
printf "|%s", all[i]
printf ORS
}
}
|