summaryrefslogtreecommitdiff
path: root/Utility/Format.hs
diff options
context:
space:
mode:
authorJoey Hess <joeyh@joeyh.name>2023-08-14 12:06:32 -0400
committerJoey Hess <joeyh@joeyh.name>2023-08-14 12:12:52 -0400
commitedf83982be214f3c839fab9b659f645de53a9100 (patch)
treebef06cb750379c6d7942fc13b13fcb328201354c /Utility/Format.hs
parentf0cd3a2a3758ddcd2f0900c16bdc1fb80bbd6e92 (diff)
downloadgit-repair-edf83982be214f3c839fab9b659f645de53a9100.tar.gz
merge from git-annex
Support building with unix-compat 0.7
Diffstat (limited to 'Utility/Format.hs')
-rw-r--r--Utility/Format.hs149
1 files changed, 94 insertions, 55 deletions
diff --git a/Utility/Format.hs b/Utility/Format.hs
index 466988c..930b7ee 100644
--- a/Utility/Format.hs
+++ b/Utility/Format.hs
@@ -1,6 +1,6 @@
{- Formatted string handling.
-
- - Copyright 2010-2020 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2023 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
@@ -9,10 +9,12 @@ module Utility.Format (
Format,
gen,
format,
+ escapedFormat,
formatContainsVar,
decode_c,
encode_c,
encode_c',
+ isUtf8Byte,
prop_encode_c_decode_c_roundtrip
) where
@@ -21,12 +23,11 @@ import Data.Char (isAlphaNum, isOctDigit, isHexDigit, isSpace, chr, ord, isAscii
import Data.Maybe (fromMaybe)
import Data.Word (Word8)
import Data.List (isPrefixOf)
-import qualified Codec.Binary.UTF8.String
import qualified Data.Map as M
+import qualified Data.ByteString as S
import Utility.PartialPrelude
-
-type FormatString = String
+import Utility.FileSystemEncoding
{- A format consists of a list of fragments. -}
type Format = [Frag]
@@ -53,7 +54,8 @@ format f vars = concatMap expand f
where
expand (Const s) = s
expand (Var name j esc)
- | esc = justify j $ encode_c' isSpace $ getvar name
+ | esc = justify j $ decodeBS $ escapedFormat $
+ encodeBS $ getvar name
| otherwise = justify j $ getvar name
getvar name = fromMaybe "" $ M.lookup name vars
justify UnJustified s = s
@@ -62,6 +64,13 @@ format f vars = concatMap expand f
pad i s = take (i - length s) spaces
spaces = repeat ' '
+escapedFormat :: S.ByteString -> S.ByteString
+escapedFormat = encode_c needescape
+ where
+ needescape c = isUtf8Byte c ||
+ isSpace (chr (fromIntegral c)) ||
+ c == fromIntegral (ord '"')
+
{- Generates a Format that can be used to expand variables in a
- format string, such as "${foo} ${bar;10} ${baz;-10}\n"
-
@@ -69,8 +78,8 @@ format f vars = concatMap expand f
-
- Also, "${escaped_foo}" will apply encode_c to the value of variable foo.
-}
-gen :: FormatString -> Format
-gen = filter (not . empty) . fuse [] . scan [] . decode_c
+gen :: String -> Format
+gen = filter (not . empty) . fuse [] . scan [] . decodeBS . decode_c . encodeBS
where
-- The Format is built up in reverse, for efficiency,
-- and can have many adjacent Consts. Fusing it fixes both
@@ -122,33 +131,50 @@ formatContainsVar v = any go
{- Decodes a C-style encoding, where \n is a newline (etc),
- \NNN is an octal encoded character, and \xNN is a hex encoded character.
-}
-decode_c :: FormatString -> String
-decode_c [] = []
-decode_c s = unescape ("", s)
+decode_c :: S.ByteString -> S.ByteString
+decode_c s
+ | S.null s = S.empty
+ | otherwise = unescape (S.empty, s)
where
- e = '\\'
- unescape (b, []) = b
- -- look for escapes starting with '\'
- unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair)
+ e = fromIntegral (ord '\\')
+ x = fromIntegral (ord 'x')
+ isescape c = c == e
+ unescape (b, v)
+ | S.null v = b
+ | otherwise = b <> fst pair <> unescape (handle $ snd pair)
where
- pair = span (/= e) v
- isescape x = x == e
- handle (x:'x':n1:n2:rest)
- | isescape x && allhex = (fromhex, rest)
+ pair = S.span (not . isescape) v
+ handle b
+ | S.length b >= 1 && isescape (S.index b 0) = handle' b
+ | otherwise = (S.empty, b)
+
+ handle' b
+ | S.length b >= 4
+ && S.index b 1 == x
+ && allhex = (fromhex, rest)
where
+ n1 = chr (fromIntegral (S.index b 2))
+ n2 = chr (fromIntegral (S.index b 3))
+ rest = S.drop 4 b
allhex = isHexDigit n1 && isHexDigit n2
- fromhex = [chr $ readhex [n1, n2]]
+ fromhex = encodeBS [chr $ readhex [n1, n2]]
readhex h = Prelude.read $ "0x" ++ h :: Int
- handle (x:n1:n2:n3:rest)
- | isescape x && alloctal = (fromoctal, rest)
+ handle' b
+ | S.length b >= 4 && alloctal = (fromoctal, rest)
where
+ n1 = chr (fromIntegral (S.index b 1))
+ n2 = chr (fromIntegral (S.index b 2))
+ n3 = chr (fromIntegral (S.index b 3))
+ rest = S.drop 4 b
alloctal = isOctDigit n1 && isOctDigit n2 && isOctDigit n3
- fromoctal = [chr $ readoctal [n1, n2, n3]]
+ fromoctal = encodeBS [chr $ readoctal [n1, n2, n3]]
readoctal o = Prelude.read $ "0o" ++ o :: Int
- -- \C is used for a few special characters
- handle (x:nc:rest)
- | isescape x = ([echar nc], rest)
+ handle' b
+ | S.length b >= 2 =
+ (S.singleton (fromIntegral (ord (echar nc))), rest)
where
+ nc = chr (fromIntegral (S.index b 1))
+ rest = S.drop 2 b
echar 'a' = '\a'
echar 'b' = '\b'
echar 'f' = '\f'
@@ -156,38 +182,50 @@ decode_c s = unescape ("", s)
echar 'r' = '\r'
echar 't' = '\t'
echar 'v' = '\v'
- echar a = a
- handle n = ("", n)
-
-{- Inverse of decode_c. -}
-encode_c :: String -> FormatString
-encode_c = encode_c' (const False)
+ echar a = a -- \\ decodes to '\', and \" to '"'
+ handle' b = (S.empty, b)
-{- Encodes special characters, as well as any matching the predicate. -}
-encode_c' :: (Char -> Bool) -> String -> FormatString
-encode_c' p = concatMap echar
+{- Inverse of decode_c. Encodes ascii control characters as well as
+ - bytes that match the predicate. (And also '\' itself.)
+ -}
+encode_c :: (Word8 -> Bool) -> S.ByteString -> S.ByteString
+encode_c p s = fromMaybe s (encode_c' p s)
+
+{- Returns Nothing when nothing needs to be escaped in the input ByteString. -}
+encode_c' :: (Word8 -> Bool) -> S.ByteString -> Maybe S.ByteString
+encode_c' p s
+ | S.any needencode s = Just (S.concatMap echar s)
+ | otherwise = Nothing
where
- e c = '\\' : [c]
- echar '\a' = e 'a'
- echar '\b' = e 'b'
- echar '\f' = e 'f'
- echar '\n' = e 'n'
- echar '\r' = e 'r'
- echar '\t' = e 't'
- echar '\v' = e 'v'
- echar '\\' = e '\\'
- echar '"' = e '"'
+ e = fromIntegral (ord '\\')
+ q = fromIntegral (ord '"')
+ del = 0x7F
+ iscontrol c = c < 0x20
+
+ echar 0x7 = ec 'a'
+ echar 0x8 = ec 'b'
+ echar 0x0C = ec 'f'
+ echar 0x0A = ec 'n'
+ echar 0x0D = ec 'r'
+ echar 0x09 = ec 't'
+ echar 0x0B = ec 'v'
echar c
- | ord c < 0x20 = e_asc c -- low ascii
- | ord c >= 256 = e_utf c -- unicode
- | ord c > 0x7E = e_asc c -- high ascii
- | p c = e_asc c
- | otherwise = [c]
- -- unicode character is decomposed to individual Word8s,
- -- and each is shown in octal
- e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8])
- e_asc c = showoctal $ ord c
- showoctal i = '\\' : printf "%03o" i
+ | iscontrol c = showoctal c -- other control characters
+ | c == e = ec '\\' -- escape the escape character itself
+ | c == del = showoctal c
+ | p c = if c == q
+ then ec '"' -- escape double quote
+ else showoctal c
+ | otherwise = S.singleton c
+
+ needencode c = iscontrol c || c == e || c == del || p c
+
+ ec c = S.pack [e, fromIntegral (ord c)]
+
+ showoctal i = encodeBS ('\\' : printf "%03o" i)
+
+isUtf8Byte :: Word8 -> Bool
+isUtf8Byte c = c >= 0x80
{- For quickcheck.
-
@@ -198,6 +236,7 @@ encode_c' p = concatMap echar
- This property papers over the problem, by only testing ascii.
-}
prop_encode_c_decode_c_roundtrip :: String -> Bool
-prop_encode_c_decode_c_roundtrip s = s' == decode_c (encode_c s')
+prop_encode_c_decode_c_roundtrip s = s' ==
+ decodeBS (decode_c (encode_c isUtf8Byte (encodeBS s')))
where
s' = filter isAscii s