diff options
Diffstat (limited to 'Utility/FileSystemEncoding.hs')
-rw-r--r-- | Utility/FileSystemEncoding.hs | 93 |
1 files changed, 79 insertions, 14 deletions
diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs index 444dc4a..f9e9814 100644 --- a/Utility/FileSystemEncoding.hs +++ b/Utility/FileSystemEncoding.hs @@ -12,12 +12,17 @@ module Utility.FileSystemEncoding ( useFileSystemEncoding, fileEncoding, withFilePath, + RawFilePath, + fromRawFilePath, + toRawFilePath, + decodeBL, + encodeBL, decodeBS, encodeBS, - decodeW8, - encodeW8, - encodeW8NUL, - decodeW8NUL, + decodeBL', + encodeBL', + decodeBS', + encodeBS', truncateFilePath, s2w8, w82s, @@ -32,8 +37,10 @@ import System.IO import System.IO.Unsafe import Data.Word import Data.List +import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L #ifdef mingw32_HOST_OS +import qualified Data.ByteString.UTF8 as S8 import qualified Data.ByteString.Lazy.UTF8 as L8 #endif @@ -103,31 +110,91 @@ _encodeFilePath fp = unsafePerformIO $ do `catchNonAsync` (\_ -> return fp) {- Decodes a ByteString into a FilePath, applying the filesystem encoding. -} -decodeBS :: L.ByteString -> FilePath +decodeBL :: L.ByteString -> FilePath #ifndef mingw32_HOST_OS -decodeBS = encodeW8NUL . L.unpack +decodeBL = encodeW8NUL . L.unpack #else {- On Windows, we assume that the ByteString is utf-8, since Windows - only uses unicode for filenames. -} -decodeBS = L8.toString +decodeBL = L8.toString #endif {- Encodes a FilePath into a ByteString, applying the filesystem encoding. -} -encodeBS :: FilePath -> L.ByteString +encodeBL :: FilePath -> L.ByteString #ifndef mingw32_HOST_OS -encodeBS = L.pack . decodeW8NUL +encodeBL = L.pack . decodeW8NUL #else -encodeBS = L8.fromString +encodeBL = L8.fromString #endif +decodeBS :: S.ByteString -> FilePath +#ifndef mingw32_HOST_OS +decodeBS = encodeW8NUL . S.unpack +#else +decodeBS = S8.toString +#endif + +encodeBS :: FilePath -> S.ByteString +#ifndef mingw32_HOST_OS +encodeBS = S.pack . decodeW8NUL +#else +encodeBS = S8.fromString +#endif + +{- Faster version that assumes the string does not contain NUL; + - if it does it will be truncated before the NUL. -} +decodeBS' :: S.ByteString -> FilePath +#ifndef mingw32_HOST_OS +decodeBS' = encodeW8 . S.unpack +#else +decodeBS' = S8.toString +#endif + +encodeBS' :: FilePath -> S.ByteString +#ifndef mingw32_HOST_OS +encodeBS' = S.pack . decodeW8 +#else +encodeBS' = S8.fromString +#endif + +decodeBL' :: L.ByteString -> FilePath +#ifndef mingw32_HOST_OS +decodeBL' = encodeW8 . L.unpack +#else +decodeBL' = L8.toString +#endif + +encodeBL' :: FilePath -> L.ByteString +#ifndef mingw32_HOST_OS +encodeBL' = L.pack . decodeW8 +#else +encodeBL' = L8.fromString +#endif + +{- Recent versions of the unix package have this alias; defined here + - for backwards compatibility. -} +type RawFilePath = S.ByteString + +{- Note that the RawFilePath is assumed to never contain NUL, + - since filename's don't. This should only be used with actual + - RawFilePaths not arbitrary ByteString that may contain NUL. -} +fromRawFilePath :: RawFilePath -> FilePath +fromRawFilePath = decodeBS' + +{- Note that the FilePath is assumed to never contain NUL, + - since filename's don't. This should only be used with actual FilePaths + - not arbitrary String that may contain NUL. -} +toRawFilePath :: FilePath -> RawFilePath +toRawFilePath = encodeBS' + {- Converts a [Word8] to a FilePath, encoding using the filesystem encoding. - - - w82c produces a String, which may contain Chars that are invalid + - w82s produces a String, which may contain Chars that are invalid - unicode. From there, this is really a simple matter of applying the - file system encoding, only complicated by GHC's interface to doing so. - - Note that the encoding stops at any NUL in the input. FilePaths - - do not normally contain embedded NUL, but Haskell Strings may. + - cannot contain embedded NUL, but Haskell Strings may. -} {-# NOINLINE encodeW8 #-} encodeW8 :: [Word8] -> FilePath @@ -135,8 +202,6 @@ encodeW8 w8 = unsafePerformIO $ do enc <- Encoding.getFileSystemEncoding GHC.withCString Encoding.char8 (w82s w8) $ GHC.peekCString enc -{- Useful when you want the actual number of bytes that will be used to - - represent the FilePath on disk. -} decodeW8 :: FilePath -> [Word8] decodeW8 = s2w8 . _encodeFilePath |