diff options
Diffstat (limited to 'Utility/FileSystemEncoding.hs')
-rw-r--r-- | Utility/FileSystemEncoding.hs | 40 |
1 files changed, 37 insertions, 3 deletions
diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs index fa4b39a..67341d3 100644 --- a/Utility/FileSystemEncoding.hs +++ b/Utility/FileSystemEncoding.hs @@ -1,19 +1,23 @@ {- GHC File system encoding handling. - - - Copyright 2012-2014 Joey Hess <joey@kitenet.net> + - Copyright 2012-2014 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} {-# LANGUAGE CPP #-} +{-# OPTIONS_GHC -fno-warn-tabs #-} module Utility.FileSystemEncoding ( fileEncoding, withFilePath, md5FilePath, decodeBS, + encodeBS, decodeW8, encodeW8, + encodeW8NUL, + decodeW8NUL, truncateFilePath, ) where @@ -25,11 +29,15 @@ import System.IO.Unsafe import qualified Data.Hash.MD5 as MD5 import Data.Word import Data.Bits.Utils +import Data.List +import Data.List.Utils import qualified Data.ByteString.Lazy as L #ifdef mingw32_HOST_OS import qualified Data.ByteString.Lazy.UTF8 as L8 #endif +import Utility.Exception + {- Sets a Handle to use the filesystem encoding. This causes data - written or read from it to be encoded/decoded the same - as ghc 7.4 does to filenames etc. This special encoding @@ -63,12 +71,16 @@ withFilePath fp f = Encoding.getFileSystemEncoding - only allows doing this conversion with CStrings, and the CString buffer - is allocated, used, and deallocated within the call, with no side - effects. + - + - If the FilePath contains a value that is not legal in the filesystem + - encoding, rather than thowing an exception, it will be returned as-is. -} {-# NOINLINE _encodeFilePath #-} _encodeFilePath :: FilePath -> String _encodeFilePath fp = unsafePerformIO $ do enc <- Encoding.getFileSystemEncoding - GHC.withCString enc fp $ GHC.peekCString Encoding.char8 + GHC.withCString enc fp (GHC.peekCString Encoding.char8) + `catchNonAsync` (\_ -> return fp) {- Encodes a FilePath into a Md5.Str, applying the filesystem encoding. -} md5FilePath :: FilePath -> MD5.Str @@ -77,18 +89,29 @@ md5FilePath = MD5.Str . _encodeFilePath {- Decodes a ByteString into a FilePath, applying the filesystem encoding. -} decodeBS :: L.ByteString -> FilePath #ifndef mingw32_HOST_OS -decodeBS = encodeW8 . L.unpack +decodeBS = encodeW8NUL . L.unpack #else {- On Windows, we assume that the ByteString is utf-8, since Windows - only uses unicode for filenames. -} decodeBS = L8.toString #endif +{- Encodes a FilePath into a ByteString, applying the filesystem encoding. -} +encodeBS :: FilePath -> L.ByteString +#ifndef mingw32_HOST_OS +encodeBS = L.pack . decodeW8NUL +#else +encodeBS = L8.fromString +#endif + {- Converts a [Word8] to a FilePath, encoding using the filesystem encoding. - - w82c produces a String, which may contain Chars that are invalid - unicode. From there, this is really a simple matter of applying the - file system encoding, only complicated by GHC's interface to doing so. + - + - Note that the encoding stops at any NUL in the input. FilePaths + - do not normally contain embedded NUL, but Haskell Strings may. -} {-# NOINLINE encodeW8 #-} encodeW8 :: [Word8] -> FilePath @@ -101,6 +124,17 @@ encodeW8 w8 = unsafePerformIO $ do decodeW8 :: FilePath -> [Word8] decodeW8 = s2w8 . _encodeFilePath +{- Like encodeW8 and decodeW8, but NULs are passed through unchanged. -} +encodeW8NUL :: [Word8] -> FilePath +encodeW8NUL = intercalate nul . map encodeW8 . split (s2w8 nul) + where + nul = ['\NUL'] + +decodeW8NUL :: FilePath -> [Word8] +decodeW8NUL = intercalate (s2w8 nul) . map decodeW8 . split nul + where + nul = ['\NUL'] + {- Truncates a FilePath to the given number of bytes (or less), - as represented on disk. - |