summaryrefslogtreecommitdiff
path: root/Git/Quote.hs
blob: 2ca442ecb6837b6b08be7e47dd200914572d970c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
{- Some git commands output quoted filenames, in a rather annoyingly complex
 - C-style encoding.
 -
 - Copyright 2010-2023 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}

{-# LANGUAGE OverloadedStrings, TypeSynonymInstances #-}

module Git.Quote (
	unquote,
	quote,
	noquote,
	QuotePath(..),
	StringContainingQuotedPath(..),
	quotedPaths,
	prop_quote_unquote_roundtrip,
) where

import Common
import Utility.Format (decode_c, encode_c, encode_c', isUtf8Byte)
import Utility.QuickCheck
import Utility.SafeOutput

import Data.Char
import Data.Word
import Data.String
import qualified Data.ByteString as S
import qualified Data.Semigroup as Sem
import Prelude

unquote :: S.ByteString -> RawFilePath
unquote b = case S.uncons b of
	Nothing -> b
	Just (h, t)
		| h /= q -> b
		| otherwise -> case S.unsnoc t of
			Nothing -> b
			Just (i, l)
				| l /= q -> b
				| otherwise -> decode_c i
  where
  	q :: Word8
	q = fromIntegral (ord '"')

-- always encodes and double quotes, even in cases that git does not
quoteAlways :: RawFilePath -> S.ByteString
quoteAlways s = "\"" <> encode_c needencode s <> "\""
  where
	needencode c = isUtf8Byte c || c == fromIntegral (ord '"')

-- git config core.quotePath controls whether to quote unicode characters
newtype QuotePath = QuotePath Bool

class Quoteable t where
	-- double quotes and encodes when git would
	quote :: QuotePath -> t -> S.ByteString

	noquote :: t -> S.ByteString

instance Quoteable RawFilePath where
	quote (QuotePath qp) s = case encode_c' needencode s of
		Nothing -> s
		Just s' -> "\"" <> s' <> "\""
	  where
		needencode c
			| c == fromIntegral (ord '"') = True
			| qp = isUtf8Byte c
			| otherwise = False

	noquote = id

-- Allows building up a string that contains paths, which will get quoted.
-- With OverloadedStrings, strings are passed through without quoting.
-- Eg: QuotedPath f <> ": not found"
data StringContainingQuotedPath
	= UnquotedString String 
	| UnquotedByteString S.ByteString 
	| QuotedPath RawFilePath
	| StringContainingQuotedPath :+: StringContainingQuotedPath
	deriving (Show, Eq)

quotedPaths :: [RawFilePath] -> StringContainingQuotedPath
quotedPaths [] = mempty
quotedPaths (p:ps) = QuotedPath p <> if null ps
	then mempty
	else " " <> quotedPaths ps

instance Quoteable StringContainingQuotedPath where
	quote _ (UnquotedString s) = safeOutput (encodeBS s)
	quote _ (UnquotedByteString s) = safeOutput s
	quote qp (QuotedPath p) = quote qp p
	quote qp (a :+: b) = quote qp a <> quote qp b

	noquote (UnquotedString s) = encodeBS s
	noquote (UnquotedByteString s) = s
	noquote (QuotedPath p) = p
	noquote (a :+: b) = noquote a <> noquote b

instance IsString StringContainingQuotedPath where
	fromString = UnquotedByteString . encodeBS

instance Sem.Semigroup StringContainingQuotedPath where
	UnquotedString a <> UnquotedString b = UnquotedString (a <> b)
	UnquotedByteString a <> UnquotedByteString b = UnquotedByteString (a <> b)
	a <> b = a :+: b

instance Monoid StringContainingQuotedPath where
	mempty = UnquotedByteString mempty

-- Encoding and then decoding roundtrips only when the string does not
-- contain high unicode, because eg, both "\12345" and "\227\128\185"
-- are encoded to "\343\200\271".
--
-- That is not a real-world problem, and using TestableFilePath
-- limits what's tested to ascii, so avoids running into it.
prop_quote_unquote_roundtrip :: TestableFilePath -> Bool
prop_quote_unquote_roundtrip ts = 
	s == fromRawFilePath (unquote (quoteAlways (toRawFilePath s)))
  where
	s = fromTestableFilePath ts