summaryrefslogtreecommitdiff
path: root/Utility/FileSystemEncoding.hs
diff options
context:
space:
mode:
Diffstat (limited to 'Utility/FileSystemEncoding.hs')
-rw-r--r--Utility/FileSystemEncoding.hs40
1 files changed, 37 insertions, 3 deletions
diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs
index fa4b39a..67341d3 100644
--- a/Utility/FileSystemEncoding.hs
+++ b/Utility/FileSystemEncoding.hs
@@ -1,19 +1,23 @@
{- GHC File system encoding handling.
-
- - Copyright 2012-2014 Joey Hess <joey@kitenet.net>
+ - Copyright 2012-2014 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
{-# LANGUAGE CPP #-}
+{-# OPTIONS_GHC -fno-warn-tabs #-}
module Utility.FileSystemEncoding (
fileEncoding,
withFilePath,
md5FilePath,
decodeBS,
+ encodeBS,
decodeW8,
encodeW8,
+ encodeW8NUL,
+ decodeW8NUL,
truncateFilePath,
) where
@@ -25,11 +29,15 @@ import System.IO.Unsafe
import qualified Data.Hash.MD5 as MD5
import Data.Word
import Data.Bits.Utils
+import Data.List
+import Data.List.Utils
import qualified Data.ByteString.Lazy as L
#ifdef mingw32_HOST_OS
import qualified Data.ByteString.Lazy.UTF8 as L8
#endif
+import Utility.Exception
+
{- Sets a Handle to use the filesystem encoding. This causes data
- written or read from it to be encoded/decoded the same
- as ghc 7.4 does to filenames etc. This special encoding
@@ -63,12 +71,16 @@ withFilePath fp f = Encoding.getFileSystemEncoding
- only allows doing this conversion with CStrings, and the CString buffer
- is allocated, used, and deallocated within the call, with no side
- effects.
+ -
+ - If the FilePath contains a value that is not legal in the filesystem
+ - encoding, rather than thowing an exception, it will be returned as-is.
-}
{-# NOINLINE _encodeFilePath #-}
_encodeFilePath :: FilePath -> String
_encodeFilePath fp = unsafePerformIO $ do
enc <- Encoding.getFileSystemEncoding
- GHC.withCString enc fp $ GHC.peekCString Encoding.char8
+ GHC.withCString enc fp (GHC.peekCString Encoding.char8)
+ `catchNonAsync` (\_ -> return fp)
{- Encodes a FilePath into a Md5.Str, applying the filesystem encoding. -}
md5FilePath :: FilePath -> MD5.Str
@@ -77,18 +89,29 @@ md5FilePath = MD5.Str . _encodeFilePath
{- Decodes a ByteString into a FilePath, applying the filesystem encoding. -}
decodeBS :: L.ByteString -> FilePath
#ifndef mingw32_HOST_OS
-decodeBS = encodeW8 . L.unpack
+decodeBS = encodeW8NUL . L.unpack
#else
{- On Windows, we assume that the ByteString is utf-8, since Windows
- only uses unicode for filenames. -}
decodeBS = L8.toString
#endif
+{- Encodes a FilePath into a ByteString, applying the filesystem encoding. -}
+encodeBS :: FilePath -> L.ByteString
+#ifndef mingw32_HOST_OS
+encodeBS = L.pack . decodeW8NUL
+#else
+encodeBS = L8.fromString
+#endif
+
{- Converts a [Word8] to a FilePath, encoding using the filesystem encoding.
-
- w82c produces a String, which may contain Chars that are invalid
- unicode. From there, this is really a simple matter of applying the
- file system encoding, only complicated by GHC's interface to doing so.
+ -
+ - Note that the encoding stops at any NUL in the input. FilePaths
+ - do not normally contain embedded NUL, but Haskell Strings may.
-}
{-# NOINLINE encodeW8 #-}
encodeW8 :: [Word8] -> FilePath
@@ -101,6 +124,17 @@ encodeW8 w8 = unsafePerformIO $ do
decodeW8 :: FilePath -> [Word8]
decodeW8 = s2w8 . _encodeFilePath
+{- Like encodeW8 and decodeW8, but NULs are passed through unchanged. -}
+encodeW8NUL :: [Word8] -> FilePath
+encodeW8NUL = intercalate nul . map encodeW8 . split (s2w8 nul)
+ where
+ nul = ['\NUL']
+
+decodeW8NUL :: FilePath -> [Word8]
+decodeW8NUL = intercalate (s2w8 nul) . map decodeW8 . split nul
+ where
+ nul = ['\NUL']
+
{- Truncates a FilePath to the given number of bytes (or less),
- as represented on disk.
-