diff options
author | Joey Hess <joeyh@debian.org> | 2013-11-22 11:16:03 -0400 |
---|---|---|
committer | Joey Hess <joeyh@debian.org> | 2013-11-22 11:16:03 -0400 |
commit | 7e592e1d6ed5e0b25b37215da7558c6324688d6f (patch) | |
tree | 75a86ff02e9311bcff817f2dcfe9b0a6ca1b5708 /Utility/FileSystemEncoding.hs | |
download | git-repair-7e592e1d6ed5e0b25b37215da7558c6324688d6f.tar.gz |
git-repair (1.20131122) unstable; urgency=low
* Added test mode, which can be used to randomly corrupt test
repositories, in reproducible ways, which allows easy
corruption-driven-development.
* Improve repair code in the case where the index file is corrupt,
and this hides other problems.
* Write a dummy .git/HEAD if the file is missing or corrupt, as
git otherwise will not treat the repository as a git repo.
* Improve fsck code to find badly corrupted objects that crash git fsck
before it can complain about them.
* Fixed crashes on bad file encodings.
* Can now run 10000 tests (git-repair --test -n 10000 --force)
with 0 failures.
# imported from the archive
Diffstat (limited to 'Utility/FileSystemEncoding.hs')
-rw-r--r-- | Utility/FileSystemEncoding.hs | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs new file mode 100644 index 0000000..ac105e7 --- /dev/null +++ b/Utility/FileSystemEncoding.hs @@ -0,0 +1,93 @@ +{- GHC File system encoding handling. + - + - Copyright 2012-2013 Joey Hess <joey@kitenet.net> + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.FileSystemEncoding ( + fileEncoding, + withFilePath, + md5FilePath, + decodeW8, + encodeW8, + truncateFilePath, +) where + +import qualified GHC.Foreign as GHC +import qualified GHC.IO.Encoding as Encoding +import Foreign.C +import System.IO +import System.IO.Unsafe +import qualified Data.Hash.MD5 as MD5 +import Data.Word +import Data.Bits.Utils + +{- Sets a Handle to use the filesystem encoding. This causes data + - written or read from it to be encoded/decoded the same + - as ghc 7.4 does to filenames etc. This special encoding + - allows "arbitrary undecodable bytes to be round-tripped through it". -} +fileEncoding :: Handle -> IO () +fileEncoding h = hSetEncoding h =<< Encoding.getFileSystemEncoding + +{- Marshal a Haskell FilePath into a NUL terminated C string using temporary + - storage. The FilePath is encoded using the filesystem encoding, + - reversing the decoding that should have been done when the FilePath + - was obtained. -} +withFilePath :: FilePath -> (CString -> IO a) -> IO a +withFilePath fp f = Encoding.getFileSystemEncoding + >>= \enc -> GHC.withCString enc fp f + +{- Encodes a FilePath into a String, applying the filesystem encoding. + - + - There are very few things it makes sense to do with such an encoded + - string. It's not a legal filename; it should not be displayed. + - So this function is not exported, but instead used by the few functions + - that can usefully consume it. + - + - This use of unsafePerformIO is belived to be safe; GHC's interface + - only allows doing this conversion with CStrings, and the CString buffer + - is allocated, used, and deallocated within the call, with no side + - effects. + -} +{-# NOINLINE _encodeFilePath #-} +_encodeFilePath :: FilePath -> String +_encodeFilePath fp = unsafePerformIO $ do + enc <- Encoding.getFileSystemEncoding + GHC.withCString enc fp $ GHC.peekCString Encoding.char8 + +{- Encodes a FilePath into a Md5.Str, applying the filesystem encoding. -} +md5FilePath :: FilePath -> MD5.Str +md5FilePath = MD5.Str . _encodeFilePath + +{- Converts a [Word8] to a FilePath, encoding using the filesystem encoding. + - + - w82c produces a String, which may contain Chars that are invalid + - unicode. From there, this is really a simple matter of applying the + - file system encoding, only complicated by GHC's interface to doing so. + -} +{-# NOINLINE encodeW8 #-} +encodeW8 :: [Word8] -> FilePath +encodeW8 w8 = unsafePerformIO $ do + enc <- Encoding.getFileSystemEncoding + GHC.withCString Encoding.char8 (w82s w8) $ GHC.peekCString enc + +{- Useful when you want the actual number of bytes that will be used to + - represent the FilePath on disk. -} +decodeW8 :: FilePath -> [Word8] +decodeW8 = s2w8 . _encodeFilePath + +{- Truncates a FilePath to the given number of bytes (or less), + - as represented on disk. + - + - Avoids returning an invalid part of a unicode byte sequence, at the + - cost of efficiency when running on a large FilePath. + -} +truncateFilePath :: Int -> FilePath -> FilePath +truncateFilePath n = go . reverse + where + go f = + let bytes = decodeW8 f + in if length bytes <= n + then reverse f + else go (drop 1 f) |