From 7e592e1d6ed5e0b25b37215da7558c6324688d6f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 22 Nov 2013 11:16:03 -0400 Subject: git-repair (1.20131122) unstable; urgency=low * Added test mode, which can be used to randomly corrupt test repositories, in reproducible ways, which allows easy corruption-driven-development. * Improve repair code in the case where the index file is corrupt, and this hides other problems. * Write a dummy .git/HEAD if the file is missing or corrupt, as git otherwise will not treat the repository as a git repo. * Improve fsck code to find badly corrupted objects that crash git fsck before it can complain about them. * Fixed crashes on bad file encodings. * Can now run 10000 tests (git-repair --test -n 10000 --force) with 0 failures. # imported from the archive --- Utility/FileSystemEncoding.hs | 93 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 Utility/FileSystemEncoding.hs (limited to 'Utility/FileSystemEncoding.hs') diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs new file mode 100644 index 0000000..ac105e7 --- /dev/null +++ b/Utility/FileSystemEncoding.hs @@ -0,0 +1,93 @@ +{- GHC File system encoding handling. + - + - Copyright 2012-2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Utility.FileSystemEncoding ( + fileEncoding, + withFilePath, + md5FilePath, + decodeW8, + encodeW8, + truncateFilePath, +) where + +import qualified GHC.Foreign as GHC +import qualified GHC.IO.Encoding as Encoding +import Foreign.C +import System.IO +import System.IO.Unsafe +import qualified Data.Hash.MD5 as MD5 +import Data.Word +import Data.Bits.Utils + +{- Sets a Handle to use the filesystem encoding. This causes data + - written or read from it to be encoded/decoded the same + - as ghc 7.4 does to filenames etc. This special encoding + - allows "arbitrary undecodable bytes to be round-tripped through it". -} +fileEncoding :: Handle -> IO () +fileEncoding h = hSetEncoding h =<< Encoding.getFileSystemEncoding + +{- Marshal a Haskell FilePath into a NUL terminated C string using temporary + - storage. The FilePath is encoded using the filesystem encoding, + - reversing the decoding that should have been done when the FilePath + - was obtained. -} +withFilePath :: FilePath -> (CString -> IO a) -> IO a +withFilePath fp f = Encoding.getFileSystemEncoding + >>= \enc -> GHC.withCString enc fp f + +{- Encodes a FilePath into a String, applying the filesystem encoding. + - + - There are very few things it makes sense to do with such an encoded + - string. It's not a legal filename; it should not be displayed. + - So this function is not exported, but instead used by the few functions + - that can usefully consume it. + - + - This use of unsafePerformIO is belived to be safe; GHC's interface + - only allows doing this conversion with CStrings, and the CString buffer + - is allocated, used, and deallocated within the call, with no side + - effects. + -} +{-# NOINLINE _encodeFilePath #-} +_encodeFilePath :: FilePath -> String +_encodeFilePath fp = unsafePerformIO $ do + enc <- Encoding.getFileSystemEncoding + GHC.withCString enc fp $ GHC.peekCString Encoding.char8 + +{- Encodes a FilePath into a Md5.Str, applying the filesystem encoding. -} +md5FilePath :: FilePath -> MD5.Str +md5FilePath = MD5.Str . _encodeFilePath + +{- Converts a [Word8] to a FilePath, encoding using the filesystem encoding. + - + - w82c produces a String, which may contain Chars that are invalid + - unicode. From there, this is really a simple matter of applying the + - file system encoding, only complicated by GHC's interface to doing so. + -} +{-# NOINLINE encodeW8 #-} +encodeW8 :: [Word8] -> FilePath +encodeW8 w8 = unsafePerformIO $ do + enc <- Encoding.getFileSystemEncoding + GHC.withCString Encoding.char8 (w82s w8) $ GHC.peekCString enc + +{- Useful when you want the actual number of bytes that will be used to + - represent the FilePath on disk. -} +decodeW8 :: FilePath -> [Word8] +decodeW8 = s2w8 . _encodeFilePath + +{- Truncates a FilePath to the given number of bytes (or less), + - as represented on disk. + - + - Avoids returning an invalid part of a unicode byte sequence, at the + - cost of efficiency when running on a large FilePath. + -} +truncateFilePath :: Int -> FilePath -> FilePath +truncateFilePath n = go . reverse + where + go f = + let bytes = decodeW8 f + in if length bytes <= n + then reverse f + else go (drop 1 f) -- cgit v1.2.3