summaryrefslogtreecommitdiff
path: root/Utility/FileSystemEncoding.hs
diff options
context:
space:
mode:
authorJoey Hess <joeyh@joeyh.name>2020-01-02 12:34:10 -0400
committerJoey Hess <joeyh@joeyh.name>2020-01-02 12:42:57 -0400
commit9df8a6eb9405dde4464d27133c04f5ee539a85de (patch)
tree8a7ac5f52be8679f8a2525515a0b2c1b715c99ad /Utility/FileSystemEncoding.hs
parent16022a8b98f4bc134542e78a42538364d2f97d92 (diff)
downloadgit-repair-9df8a6eb9405dde4464d27133c04f5ee539a85de.tar.gz
merge from git-annex and relicense accordingly
Merge git library and utility from git-annex. The former is now relicensed AGPL, so git-repair as a whole becomes AGPL. For simplicity, I am relicensing the remainder of the code in git-repair AGPL as well, per the header changes in this commit. While that code is also technically available under the GPL license, as it's been released under that license before, changes going forward will be only released by me under the AGPL.
Diffstat (limited to 'Utility/FileSystemEncoding.hs')
-rw-r--r--Utility/FileSystemEncoding.hs93
1 files changed, 79 insertions, 14 deletions
diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs
index 444dc4a..f9e9814 100644
--- a/Utility/FileSystemEncoding.hs
+++ b/Utility/FileSystemEncoding.hs
@@ -12,12 +12,17 @@ module Utility.FileSystemEncoding (
useFileSystemEncoding,
fileEncoding,
withFilePath,
+ RawFilePath,
+ fromRawFilePath,
+ toRawFilePath,
+ decodeBL,
+ encodeBL,
decodeBS,
encodeBS,
- decodeW8,
- encodeW8,
- encodeW8NUL,
- decodeW8NUL,
+ decodeBL',
+ encodeBL',
+ decodeBS',
+ encodeBS',
truncateFilePath,
s2w8,
w82s,
@@ -32,8 +37,10 @@ import System.IO
import System.IO.Unsafe
import Data.Word
import Data.List
+import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
#ifdef mingw32_HOST_OS
+import qualified Data.ByteString.UTF8 as S8
import qualified Data.ByteString.Lazy.UTF8 as L8
#endif
@@ -103,31 +110,91 @@ _encodeFilePath fp = unsafePerformIO $ do
`catchNonAsync` (\_ -> return fp)
{- Decodes a ByteString into a FilePath, applying the filesystem encoding. -}
-decodeBS :: L.ByteString -> FilePath
+decodeBL :: L.ByteString -> FilePath
#ifndef mingw32_HOST_OS
-decodeBS = encodeW8NUL . L.unpack
+decodeBL = encodeW8NUL . L.unpack
#else
{- On Windows, we assume that the ByteString is utf-8, since Windows
- only uses unicode for filenames. -}
-decodeBS = L8.toString
+decodeBL = L8.toString
#endif
{- Encodes a FilePath into a ByteString, applying the filesystem encoding. -}
-encodeBS :: FilePath -> L.ByteString
+encodeBL :: FilePath -> L.ByteString
#ifndef mingw32_HOST_OS
-encodeBS = L.pack . decodeW8NUL
+encodeBL = L.pack . decodeW8NUL
#else
-encodeBS = L8.fromString
+encodeBL = L8.fromString
#endif
+decodeBS :: S.ByteString -> FilePath
+#ifndef mingw32_HOST_OS
+decodeBS = encodeW8NUL . S.unpack
+#else
+decodeBS = S8.toString
+#endif
+
+encodeBS :: FilePath -> S.ByteString
+#ifndef mingw32_HOST_OS
+encodeBS = S.pack . decodeW8NUL
+#else
+encodeBS = S8.fromString
+#endif
+
+{- Faster version that assumes the string does not contain NUL;
+ - if it does it will be truncated before the NUL. -}
+decodeBS' :: S.ByteString -> FilePath
+#ifndef mingw32_HOST_OS
+decodeBS' = encodeW8 . S.unpack
+#else
+decodeBS' = S8.toString
+#endif
+
+encodeBS' :: FilePath -> S.ByteString
+#ifndef mingw32_HOST_OS
+encodeBS' = S.pack . decodeW8
+#else
+encodeBS' = S8.fromString
+#endif
+
+decodeBL' :: L.ByteString -> FilePath
+#ifndef mingw32_HOST_OS
+decodeBL' = encodeW8 . L.unpack
+#else
+decodeBL' = L8.toString
+#endif
+
+encodeBL' :: FilePath -> L.ByteString
+#ifndef mingw32_HOST_OS
+encodeBL' = L.pack . decodeW8
+#else
+encodeBL' = L8.fromString
+#endif
+
+{- Recent versions of the unix package have this alias; defined here
+ - for backwards compatibility. -}
+type RawFilePath = S.ByteString
+
+{- Note that the RawFilePath is assumed to never contain NUL,
+ - since filename's don't. This should only be used with actual
+ - RawFilePaths not arbitrary ByteString that may contain NUL. -}
+fromRawFilePath :: RawFilePath -> FilePath
+fromRawFilePath = decodeBS'
+
+{- Note that the FilePath is assumed to never contain NUL,
+ - since filename's don't. This should only be used with actual FilePaths
+ - not arbitrary String that may contain NUL. -}
+toRawFilePath :: FilePath -> RawFilePath
+toRawFilePath = encodeBS'
+
{- Converts a [Word8] to a FilePath, encoding using the filesystem encoding.
-
- - w82c produces a String, which may contain Chars that are invalid
+ - w82s produces a String, which may contain Chars that are invalid
- unicode. From there, this is really a simple matter of applying the
- file system encoding, only complicated by GHC's interface to doing so.
-
- Note that the encoding stops at any NUL in the input. FilePaths
- - do not normally contain embedded NUL, but Haskell Strings may.
+ - cannot contain embedded NUL, but Haskell Strings may.
-}
{-# NOINLINE encodeW8 #-}
encodeW8 :: [Word8] -> FilePath
@@ -135,8 +202,6 @@ encodeW8 w8 = unsafePerformIO $ do
enc <- Encoding.getFileSystemEncoding
GHC.withCString Encoding.char8 (w82s w8) $ GHC.peekCString enc
-{- Useful when you want the actual number of bytes that will be used to
- - represent the FilePath on disk. -}
decodeW8 :: FilePath -> [Word8]
decodeW8 = s2w8 . _encodeFilePath