diff options
author | Joey Hess <joeyh@joeyh.name> | 2023-08-14 12:06:32 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2023-08-14 12:12:52 -0400 |
commit | edf83982be214f3c839fab9b659f645de53a9100 (patch) | |
tree | bef06cb750379c6d7942fc13b13fcb328201354c /Git/Quote.hs | |
parent | f0cd3a2a3758ddcd2f0900c16bdc1fb80bbd6e92 (diff) | |
download | git-repair-edf83982be214f3c839fab9b659f645de53a9100.tar.gz |
merge from git-annex
Support building with unix-compat 0.7
Diffstat (limited to 'Git/Quote.hs')
-rw-r--r-- | Git/Quote.hs | 122 |
1 files changed, 122 insertions, 0 deletions
diff --git a/Git/Quote.hs b/Git/Quote.hs new file mode 100644 index 0000000..2ca442e --- /dev/null +++ b/Git/Quote.hs @@ -0,0 +1,122 @@ +{- Some git commands output quoted filenames, in a rather annoyingly complex + - C-style encoding. + - + - Copyright 2010-2023 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings, TypeSynonymInstances #-} + +module Git.Quote ( + unquote, + quote, + noquote, + QuotePath(..), + StringContainingQuotedPath(..), + quotedPaths, + prop_quote_unquote_roundtrip, +) where + +import Common +import Utility.Format (decode_c, encode_c, encode_c', isUtf8Byte) +import Utility.QuickCheck +import Utility.SafeOutput + +import Data.Char +import Data.Word +import Data.String +import qualified Data.ByteString as S +import qualified Data.Semigroup as Sem +import Prelude + +unquote :: S.ByteString -> RawFilePath +unquote b = case S.uncons b of + Nothing -> b + Just (h, t) + | h /= q -> b + | otherwise -> case S.unsnoc t of + Nothing -> b + Just (i, l) + | l /= q -> b + | otherwise -> decode_c i + where + q :: Word8 + q = fromIntegral (ord '"') + +-- always encodes and double quotes, even in cases that git does not +quoteAlways :: RawFilePath -> S.ByteString +quoteAlways s = "\"" <> encode_c needencode s <> "\"" + where + needencode c = isUtf8Byte c || c == fromIntegral (ord '"') + +-- git config core.quotePath controls whether to quote unicode characters +newtype QuotePath = QuotePath Bool + +class Quoteable t where + -- double quotes and encodes when git would + quote :: QuotePath -> t -> S.ByteString + + noquote :: t -> S.ByteString + +instance Quoteable RawFilePath where + quote (QuotePath qp) s = case encode_c' needencode s of + Nothing -> s + Just s' -> "\"" <> s' <> "\"" + where + needencode c + | c == fromIntegral (ord '"') = True + | qp = isUtf8Byte c + | otherwise = False + + noquote = id + +-- Allows building up a string that contains paths, which will get quoted. +-- With OverloadedStrings, strings are passed through without quoting. +-- Eg: QuotedPath f <> ": not found" +data StringContainingQuotedPath + = UnquotedString String + | UnquotedByteString S.ByteString + | QuotedPath RawFilePath + | StringContainingQuotedPath :+: StringContainingQuotedPath + deriving (Show, Eq) + +quotedPaths :: [RawFilePath] -> StringContainingQuotedPath +quotedPaths [] = mempty +quotedPaths (p:ps) = QuotedPath p <> if null ps + then mempty + else " " <> quotedPaths ps + +instance Quoteable StringContainingQuotedPath where + quote _ (UnquotedString s) = safeOutput (encodeBS s) + quote _ (UnquotedByteString s) = safeOutput s + quote qp (QuotedPath p) = quote qp p + quote qp (a :+: b) = quote qp a <> quote qp b + + noquote (UnquotedString s) = encodeBS s + noquote (UnquotedByteString s) = s + noquote (QuotedPath p) = p + noquote (a :+: b) = noquote a <> noquote b + +instance IsString StringContainingQuotedPath where + fromString = UnquotedByteString . encodeBS + +instance Sem.Semigroup StringContainingQuotedPath where + UnquotedString a <> UnquotedString b = UnquotedString (a <> b) + UnquotedByteString a <> UnquotedByteString b = UnquotedByteString (a <> b) + a <> b = a :+: b + +instance Monoid StringContainingQuotedPath where + mempty = UnquotedByteString mempty + +-- Encoding and then decoding roundtrips only when the string does not +-- contain high unicode, because eg, both "\12345" and "\227\128\185" +-- are encoded to "\343\200\271". +-- +-- That is not a real-world problem, and using TestableFilePath +-- limits what's tested to ascii, so avoids running into it. +prop_quote_unquote_roundtrip :: TestableFilePath -> Bool +prop_quote_unquote_roundtrip ts = + s == fromRawFilePath (unquote (quoteAlways (toRawFilePath s))) + where + s = fromTestableFilePath ts |