1 files changed, 40 insertions, 13 deletions
diff --git a/Git/Filename.hs b/Git/Filename.hs
index ee84d48..010e5ba 100644
--- a/Git/Filename.hs
+++ b/Git/Filename.hs
@@ -3,26 +3,53 @@
  -
  - Copyright 2010, 2011 Joey Hess <id@joeyh.name>
  -
- - Licensed under the GNU GPL version 3 or higher.
+ - Licensed under the GNU AGPL version 3 or higher.
  -}
 
 module Git.Filename where
 
+import Common
 import Utility.Format (decode_c, encode_c)
 
-import Common
+import Data.Char
+import Data.Word
+import qualified Data.ByteString as S
 
-decode :: String -> FilePath
-decode [] = []
-decode f@(c:s)
-	-- encoded strings will be inside double quotes
-	| c == '"' && end s == ['"'] = decode_c $ beginning s
-	| otherwise = f
+-- encoded filenames will be inside double quotes
+decode :: S.ByteString -> RawFilePath
+decode b = case S.uncons b of
+	Nothing -> b
+	Just (h, t)
+		| h /= q -> b
+		| otherwise -> case S.unsnoc t of
+			Nothing -> b
+			Just (i, l)
+				| l /= q -> b
+				| otherwise ->
+					encodeBS $ decode_c $ decodeBS i
+  where
+  	q :: Word8
+	q = fromIntegral (ord '"')
 
 {- Should not need to use this, except for testing decode. -}
-encode :: FilePath -> String
-encode s = "\"" ++ encode_c s ++ "\""
+encode :: RawFilePath -> S.ByteString
+encode s = encodeBS $ "\"" ++ encode_c (decodeBS s) ++ "\""
 
-{- for quickcheck -}
-prop_isomorphic_deencode :: String -> Bool
-prop_isomorphic_deencode s = s == decode (encode s)
+prop_encode_decode_roundtrip :: FilePath -> Bool
+prop_encode_decode_roundtrip s = s' ==
+	fromRawFilePath (decode (encode (toRawFilePath s')))
+  where
+	s' = nonul (nohigh s)
+	-- Encoding and then decoding roundtrips only when
+	-- the string does not contain high unicode, because eg, 
+	-- both "\12345" and "\227\128\185" are encoded to
+	-- "\343\200\271".
+	--
+	-- This property papers over the problem, by only
+	-- testing ascii
+	nohigh = filter isAscii
+	-- A String can contain a NUL, but toRawFilePath
+	-- truncates on the NUL, which is generally fine
+	-- because unix filenames cannot contain NUL.
+	-- So the encoding only roundtrips when there is no nul.
+	nonul = filter (/= '\NUL')