# Copyright (C) 2023 Sean Whitton # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or (at # your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # Where each input record is a glob, output a single ERE matching the # disjunction of all the non-empty input records. # This is for matching, not expansion: '/' and '.' are not treated specially. # There is no shell quotation removal, and we do not yet support collating # symbols or equivalence classes within bracket expressions. # There is no input validation. # One field per record. BEGIN { FS = RS } function getchar () { c = substr($0, ++i, 1) } length { res[++rl] = "^" while (i < length) { getchar() if (c == "*") { if (rl == 1) rl-- else if (res[rl] != ".*") res[++rl] = ".*" } else if (c == "?") res[++rl] = "." else if (c == "[") { res[++rl] = "["; getchar() if (c == "!") { res[++rl] = "^"; getchar() } if (c == "]") { res[++rl] = "]"; getchar() } if (c == "^") { circ = 1; getchar() } else circ = 0 while (c != "]") { rest = substr($0, i) if (match(rest, /^\[:[a-z]+:\]/) == 1) { res[++rl] = substr(rest, 1, RLENGTH) i += RLENGTH; c = substr($0, i, 1) } else res[++rl] = c; getchar() } res[++rl] = circ ? "^]" : "]" } else if (c == "\\") { getchar() escaped(c) } else escaped(c) } if (res[rl] == ".*") rl-- else res[++rl] = "$" j++ for (i = 1; i <= rl; i++) all[j] = all[j] res[i] i = 0; rl = 0; split("", res) } # In an ERE, we can use a backslash to escape any character. # However, it is good to avoid generating longer EREs than are necessary. # We do escape forward slashes, for ease of use with awk. function escaped (c) { res[++rl] = c ~ /[[(.*+?{|^$\/\\]/ ? "\\" c : c } END { if (j) { printf all[1] for (i = 2; i <= j; i++) printf "|%s", all[i] printf ORS } }