From edf83982be214f3c839fab9b659f645de53a9100 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 14 Aug 2023 12:06:32 -0400 Subject: merge from git-annex Support building with unix-compat 0.7 --- CHANGELOG | 7 ++ Common.hs | 2 +- Git.hs | 16 ++--- Git/CatFile.hs | 4 +- Git/Config.hs | 95 +++++++++++++++++++++------ Git/Construct.hs | 42 +++++++----- Git/CurrentRepo.hs | 7 +- Git/Destroyer.hs | 14 ++-- Git/FilePath.hs | 10 +-- Git/Filename.hs | 49 -------------- Git/HashObject.hs | 43 ++++++++---- Git/LsFiles.hs | 2 +- Git/LsTree.hs | 4 +- Git/Quote.hs | 122 ++++++++++++++++++++++++++++++++++ Git/Remote.hs | 20 +++--- Git/Repair.hs | 12 ++-- Git/Sha.hs | 2 +- Git/Types.hs | 2 + Git/UpdateIndex.hs | 40 +++++++----- Utility/CopyFile.hs | 13 ++-- Utility/DataUnits.hs | 56 +++++++++++----- Utility/Directory.hs | 10 +-- Utility/Directory/Create.hs | 51 ++++++++------- Utility/Exception.hs | 27 +++++--- Utility/FileMode.hs | 38 ++++++----- Utility/FileSize.hs | 6 +- Utility/Format.hs | 149 ++++++++++++++++++++++++++---------------- Utility/InodeCache.hs | 16 +++-- Utility/Metered.hs | 7 +- Utility/Misc.hs | 10 ++- Utility/Monad.hs | 8 +++ Utility/MoveFile.hs | 25 ++++--- Utility/Path.hs | 5 +- Utility/Path/AbsRel.hs | 2 +- Utility/Process.hs | 7 +- Utility/Process/Transcript.hs | 97 +++++++++++++++++++++++++++ Utility/QuickCheck.hs | 1 + Utility/RawFilePath.hs | 59 +++++++++++++---- Utility/SafeOutput.hs | 36 ++++++++++ Utility/SystemDirectory.hs | 2 +- Utility/Tmp.hs | 7 +- Utility/Url/Parse.hs | 63 ++++++++++++++++++ Utility/UserInfo.hs | 27 ++++---- git-repair.cabal | 7 +- 44 files changed, 882 insertions(+), 340 deletions(-) delete mode 100644 Git/Filename.hs create mode 100644 Git/Quote.hs create mode 100644 Utility/Process/Transcript.hs create mode 100644 Utility/SafeOutput.hs create mode 100644 Utility/Url/Parse.hs diff --git a/CHANGELOG b/CHANGELOG index 737693b..3abf0d8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,10 @@ +git-repair (1.20230814) UNRELEASED; urgency=medium + + * Merge from git-annex. + * Support building with unix-compat 0.7 + + -- Joey Hess Mon, 14 Aug 2023 12:06:46 -0400 + git-repair (1.20220404) unstable; urgency=medium * Avoid treating refs that are not commit objects as evidence of diff --git a/Common.hs b/Common.hs index 5a658a6..ebe6d3f 100644 --- a/Common.hs +++ b/Common.hs @@ -18,7 +18,7 @@ import System.IO as X hiding (FilePath) import System.Posix.IO as X hiding (createPipe) #endif import System.Exit as X -import System.PosixCompat.Files as X +import System.PosixCompat.Files as X (FileStatus) import Utility.Misc as X import Utility.Exception as X diff --git a/Git.hs b/Git.hs index f8eedc0..e567917 100644 --- a/Git.hs +++ b/Git.hs @@ -1,6 +1,6 @@ {- git repository handling - - - This is written to be completely independant of git-annex and should be + - This is written to be completely independent of git-annex and should be - suitable for other uses. - - Copyright 2010-2021 Joey Hess @@ -68,18 +68,18 @@ repoLocation Repo { location = UnparseableUrl url } = url repoLocation Repo { location = Local { worktree = Just dir } } = fromRawFilePath dir repoLocation Repo { location = Local { gitdir = dir } } = fromRawFilePath dir repoLocation Repo { location = LocalUnknown dir } = fromRawFilePath dir -repoLocation Repo { location = Unknown } = error "unknown repoLocation" +repoLocation Repo { location = Unknown } = giveup "unknown repoLocation" {- Path to a repository. For non-bare, this is the worktree, for bare, - - it's the gitdit, and for URL repositories, is the path on the remote + - it's the gitdir, and for URL repositories, is the path on the remote - host. -} repoPath :: Repo -> RawFilePath repoPath Repo { location = Url u } = toRawFilePath $ unEscapeString $ uriPath u repoPath Repo { location = Local { worktree = Just d } } = d repoPath Repo { location = Local { gitdir = d } } = d repoPath Repo { location = LocalUnknown dir } = dir -repoPath Repo { location = Unknown } = error "unknown repoPath" -repoPath Repo { location = UnparseableUrl _u } = error "unknwon repoPath" +repoPath Repo { location = Unknown } = giveup "unknown repoPath" +repoPath Repo { location = UnparseableUrl _u } = giveup "unknown repoPath" repoWorkTree :: Repo -> Maybe RawFilePath repoWorkTree Repo { location = Local { worktree = Just d } } = Just d @@ -88,7 +88,7 @@ repoWorkTree _ = Nothing {- Path to a local repository's .git directory. -} localGitDir :: Repo -> RawFilePath localGitDir Repo { location = Local { gitdir = d } } = d -localGitDir _ = error "unknown localGitDir" +localGitDir _ = giveup "unknown localGitDir" {- Some code needs to vary between URL and normal repos, - or bare and non-bare, these functions help with that. -} @@ -129,7 +129,7 @@ repoIsLocalUnknown _ = False assertLocal :: Repo -> a -> a assertLocal repo action - | repoIsUrl repo = error $ unwords + | repoIsUrl repo = giveup $ unwords [ "acting on non-local git repo" , repoDescribe repo , "not supported" @@ -156,7 +156,7 @@ hookPath script repo = do #if mingw32_HOST_OS isexecutable f = doesFileExist f #else - isexecutable f = isExecutable . fileMode <$> getFileStatus f + isexecutable f = isExecutable . fileMode <$> getSymbolicLinkStatus f #endif {- Makes the path to a local Repo be relative to the cwd. -} diff --git a/Git/CatFile.hs b/Git/CatFile.hs index f33ad49..daa41ad 100644 --- a/Git/CatFile.hs +++ b/Git/CatFile.hs @@ -120,7 +120,7 @@ catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \f content <- readObjectContent from r return $ Just (content, sha, objtype) Just DNE -> return Nothing - Nothing -> error $ "unknown response from git cat-file " ++ show (header, object) + Nothing -> giveup $ "unknown response from git cat-file " ++ show (header, object) where -- Slow fallback path for filenames containing newlines. newlinefallback = queryObjectType object (catFileGitRepo h) >>= \case @@ -144,7 +144,7 @@ readObjectContent h (ParsedResp _ _ size) = do eatchar expected = do c <- hGetChar h when (c /= expected) $ - error $ "missing " ++ (show expected) ++ " from git cat-file" + giveup $ "missing " ++ (show expected) ++ " from git cat-file" readObjectContent _ DNE = error "internal" {- Gets the size and type of an object, without reading its content. -} diff --git a/Git/Config.hs b/Git/Config.hs index 5deba6b..4ff3454 100644 --- a/Git/Config.hs +++ b/Git/Config.hs @@ -1,6 +1,6 @@ {- git repository configuration handling - - - Copyright 2010-2020 Joey Hess + - Copyright 2010-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -22,6 +22,8 @@ import Git.Types import qualified Git.Command import qualified Git.Construct import Utility.UserInfo +import Utility.Process.Transcript +import Utility.Debug {- Returns a single git config setting, or a fallback value if not set. -} get :: ConfigKey -> ConfigValue -> Repo -> ConfigValue @@ -55,12 +57,22 @@ reRead r = read' $ r read' :: Repo -> IO Repo read' repo = go repo where - go Repo { location = Local { gitdir = d } } = git_config d - go Repo { location = LocalUnknown d } = git_config d + -- Passing --git-dir changes git's behavior when run in a + -- repository belonging to another user. When the git directory + -- was explicitly specified, pass that in order to get the local + -- git config. + go Repo { location = Local { gitdir = d } } + | gitDirSpecifiedExplicitly repo = git_config ["--git-dir=."] d + -- Run in worktree when there is one, since running in the .git + -- directory will trigger safe.bareRepository=explicit, even + -- when not in a bare repository. + go Repo { location = Local { worktree = Just d } } = git_config [] d + go Repo { location = Local { gitdir = d } } = git_config [] d + go Repo { location = LocalUnknown d } = git_config [] d go _ = assertLocal repo $ error "internal" - git_config d = withCreateProcess p (git_config' p) + git_config addparams d = withCreateProcess p (git_config' p) where - params = ["config", "--null", "--list"] + params = addparams ++ ["config", "--null", "--list"] p = (proc "git" params) { cwd = Just (fromRawFilePath d) , env = gitEnv repo @@ -94,19 +106,23 @@ global = do hRead :: Repo -> ConfigStyle -> Handle -> IO Repo hRead repo st h = do val <- S.hGetContents h - store val st repo + let c = parse val st + debug (DebugSource "Git.Config") $ "git config read: " ++ + show (map (\(k, v) -> (show k, map show v)) (M.toList c)) + storeParsed c repo {- Stores a git config into a Repo, returning the new version of the Repo. - The git config may be multiple lines, or a single line. - Config settings can be updated incrementally. -} store :: S.ByteString -> ConfigStyle -> Repo -> IO Repo -store s st repo = do - let c = parse s st - updateLocation $ repo - { config = (M.map Prelude.head c) `M.union` config repo - , fullconfig = M.unionWith (++) c (fullconfig repo) - } +store s st = storeParsed (parse s st) + +storeParsed :: M.Map ConfigKey [ConfigValue] -> Repo -> IO Repo +storeParsed c repo = updateLocation $ repo + { config = (M.map Prelude.head c) `M.union` config repo + , fullconfig = M.unionWith (++) c (fullconfig repo) + } {- Stores a single config setting in a Repo, returning the new version of - the Repo. Config settings can be updated incrementally. -} @@ -123,14 +139,28 @@ store' k v repo = repo - based on the core.bare and core.worktree settings. -} updateLocation :: Repo -> IO Repo -updateLocation r@(Repo { location = LocalUnknown d }) - | isBare r = ifM (doesDirectoryExist (fromRawFilePath dotgit)) - ( updateLocation' r $ Local dotgit Nothing - , updateLocation' r $ Local d Nothing - ) - | otherwise = updateLocation' r $ Local dotgit (Just d) +updateLocation r@(Repo { location = LocalUnknown d }) = case isBare r of + Just True -> ifM (doesDirectoryExist (fromRawFilePath dotgit)) + ( updateLocation' r $ Local dotgit Nothing + , updateLocation' r $ Local d Nothing + ) + Just False -> mknonbare + {- core.bare not in config, probably because safe.directory + - did not allow reading the config -} + Nothing -> ifM (Git.Construct.isBareRepo (fromRawFilePath d)) + ( mkbare + , mknonbare + ) where dotgit = d P. ".git" + -- git treats eg ~/foo as a bare git repository located in + -- ~/foo/.git if ~/foo/.git/config has core.bare=true + mkbare = ifM (doesDirectoryExist (fromRawFilePath dotgit)) + ( updateLocation' r $ Local dotgit Nothing + , updateLocation' r $ Local d Nothing + ) + mknonbare = updateLocation' r $ Local dotgit (Just d) + updateLocation r@(Repo { location = l@(Local {}) }) = updateLocation' r l updateLocation r = return r @@ -202,8 +232,9 @@ boolConfig' :: Bool -> S.ByteString boolConfig' True = "true" boolConfig' False = "false" -isBare :: Repo -> Bool -isBare r = fromMaybe False $ isTrueFalse' =<< getMaybe coreBare r +{- Note that repoIsLocalBare is often better to use than this. -} +isBare :: Repo -> Maybe Bool +isBare r = isTrueFalse' =<< getMaybe coreBare r coreBare :: ConfigKey coreBare = "core.bare" @@ -273,3 +304,27 @@ unset ck@(ConfigKey k) r = ifM (Git.Command.runBool ps r) ) where ps = [Param "config", Param "--unset-all", Param (decodeBS k)] + +{- git "fixed" CVE-2022-24765 by preventing git-config from + - listing per-repo configs when the repo is not owned by + - the current user. Detect if this fix is in effect for the + - repo. + -} +checkRepoConfigInaccessible :: Repo -> IO Bool +checkRepoConfigInaccessible r + -- When --git-dir or GIT_DIR is used to specify the git + -- directory, git does not check for CVE-2022-24765. + | gitDirSpecifiedExplicitly r = return False + | otherwise = do + -- Cannot use gitCommandLine here because specifying --git-dir + -- will bypass the git security check. + let p = (proc "git" ["config", "--local", "--list"]) + { cwd = Just (fromRawFilePath (repoPath r)) + , env = gitEnv r + } + (out, ok) <- processTranscript' p Nothing + if not ok + then do + debug (DebugSource "Git.Config") ("config output: " ++ out) + return True + else return False diff --git a/Git/Construct.hs b/Git/Construct.hs index a5e825e..bdab8ed 100644 --- a/Git/Construct.hs +++ b/Git/Construct.hs @@ -1,6 +1,6 @@ {- Construction of Git Repo objects - - - Copyright 2010-2021 Joey Hess + - Copyright 2010-2023 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -23,6 +23,7 @@ module Git.Construct ( checkForRepo, newFrom, adjustGitDirFile, + isBareRepo, ) where #ifndef mingw32_HOST_OS @@ -38,6 +39,7 @@ import Git.Remote import Git.FilePath import qualified Git.Url as Url import Utility.UserInfo +import Utility.Url.Parse import qualified Data.ByteString as B import qualified System.FilePath.ByteString as P @@ -84,7 +86,7 @@ fromAbsPath :: RawFilePath -> IO Repo fromAbsPath dir | absoluteGitPath dir = fromPath dir | otherwise = - error $ "internal error, " ++ show dir ++ " is not absolute" + giveup $ "internal error, " ++ show dir ++ " is not absolute" {- Construct a Repo for a remote's url. - @@ -103,10 +105,10 @@ fromUrl url fromUrl' :: String -> IO Repo fromUrl' url - | "file://" `isPrefixOf` url = case parseURI url of + | "file://" `isPrefixOf` url = case parseURIPortable url of Just u -> fromAbsPath $ toRawFilePath $ unEscapeString $ uriPath u Nothing -> pure $ newFrom $ UnparseableUrl url - | otherwise = case parseURI url of + | otherwise = case parseURIPortable url of Just u -> pure $ newFrom $ Url u Nothing -> pure $ newFrom $ UnparseableUrl url @@ -128,7 +130,7 @@ localToUrl reference r , auth , fromRawFilePath (repoPath r) ] - in r { location = Url $ fromJust $ parseURI absurl } + in r { location = Url $ fromJust $ parseURIPortable absurl } _ -> r {- Calculates a list of a repo's configured remotes, by parsing its config. -} @@ -139,7 +141,7 @@ fromRemotes repo = catMaybes <$> mapM construct remotepairs filterkeys f = filterconfig (\(k,_) -> f k) remotepairs = filterkeys isRemoteUrlKey construct (k,v) = remoteNamedFromKey k $ - fromRemoteLocation (fromConfigValue v) repo + fromRemoteLocation (fromConfigValue v) False repo {- Sets the name of a remote when constructing the Repo to represent it. -} remoteNamed :: String -> IO Repo -> IO Repo @@ -155,9 +157,15 @@ remoteNamedFromKey k r = case remoteKeyToRemoteName k of Just n -> Just <$> remoteNamed n r {- Constructs a new Repo for one of a Repo's remotes using a given - - location (ie, an url). -} -fromRemoteLocation :: String -> Repo -> IO Repo -fromRemoteLocation s repo = gen $ parseRemoteLocation s repo + - location (ie, an url). + - + - knownurl can be true if the location is known to be an url. This allows + - urls that don't parse as urls to be used, returning UnparseableUrl. + - If knownurl is false, the location may still be an url, if it parses as + - one. + -} +fromRemoteLocation :: String -> Bool -> Repo -> IO Repo +fromRemoteLocation s knownurl repo = gen $ parseRemoteLocation s knownurl repo where gen (RemotePath p) = fromRemotePath p repo gen (RemoteUrl u) = fromUrl u @@ -216,7 +224,7 @@ checkForRepo :: FilePath -> IO (Maybe RepoLocation) checkForRepo dir = check isRepo $ check (checkGitDirFile (toRawFilePath dir)) $ - check isBareRepo $ + check (checkdir (isBareRepo dir)) $ return Nothing where check test cont = maybe cont (return . Just) =<< test @@ -225,16 +233,17 @@ checkForRepo dir = , return Nothing ) isRepo = checkdir $ - gitSignature (".git" "config") + doesFileExist (dir ".git" "config") <||> - -- A git-worktree lacks .git/config, but has .git/commondir. + -- A git-worktree lacks .git/config, but has .git/gitdir. -- (Normally the .git is a file, not a symlink, but it can -- be converted to a symlink and git will still work; -- this handles that case.) - gitSignature (".git" "gitdir") - isBareRepo = checkdir $ gitSignature "config" - <&&> doesDirectoryExist (dir "objects") - gitSignature file = doesFileExist $ dir file + doesFileExist (dir ".git" "gitdir") + +isBareRepo :: FilePath -> IO Bool +isBareRepo dir = doesFileExist (dir "config") + <&&> doesDirectoryExist (dir "objects") -- Check for a .git file. checkGitDirFile :: RawFilePath -> IO (Maybe RepoLocation) @@ -277,5 +286,6 @@ newFrom l = Repo , gitEnv = Nothing , gitEnvOverridesGitDir = False , gitGlobalOpts = [] + , gitDirSpecifiedExplicitly = False } diff --git a/Git/CurrentRepo.hs b/Git/CurrentRepo.hs index 9261eab..54e05f4 100644 --- a/Git/CurrentRepo.hs +++ b/Git/CurrentRepo.hs @@ -1,6 +1,6 @@ {- The current git repository. - - - Copyright 2012-2020 Joey Hess + - Copyright 2012-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -79,8 +79,9 @@ get = do { gitdir = absd , worktree = Just curr } - r <- Git.Config.read $ newFrom loc - return $ if Git.Config.isBare r + r <- Git.Config.read $ (newFrom loc) + { gitDirSpecifiedExplicitly = True } + return $ if fromMaybe False (Git.Config.isBare r) then r { location = (location r) { worktree = Nothing } } else r configure Nothing Nothing = giveup "Not in a git repository." diff --git a/Git/Destroyer.hs b/Git/Destroyer.hs index 4d84eec..9b75178 100644 --- a/Git/Destroyer.hs +++ b/Git/Destroyer.hs @@ -18,7 +18,9 @@ import Git import Utility.QuickCheck import Utility.FileMode import Utility.Tmp +import qualified Utility.RawFilePath as R +import System.PosixCompat.Files import qualified Data.ByteString as B import Data.Word @@ -95,12 +97,12 @@ applyDamage ds r = do case d of Empty s -> withfile s $ \f -> withSaneMode f $ do - removeWhenExistsWith removeLink f + removeWhenExistsWith R.removeLink (toRawFilePath f) writeFile f "" Reverse s -> withfile s $ \f -> withSaneMode f $ B.writeFile f =<< B.reverse <$> B.readFile f - Delete s -> withfile s $ removeWhenExistsWith removeLink + Delete s -> withfile s $ removeWhenExistsWith R.removeLink . toRawFilePath AppendGarbage s garbage -> withfile s $ \f -> withSaneMode f $ @@ -127,15 +129,15 @@ applyDamage ds r = do ] ScrambleFileMode s mode -> withfile s $ \f -> - setFileMode f mode + R.setFileMode (toRawFilePath f) mode SwapFiles a b -> withfile a $ \fa -> withfile b $ \fb -> unless (fa == fb) $ withTmpFile "swap" $ \tmp _ -> do - moveFile fa tmp - moveFile fb fa - moveFile tmp fa + moveFile (toRawFilePath fa) (toRawFilePath tmp) + moveFile (toRawFilePath fb) (toRawFilePath fa) + moveFile (toRawFilePath tmp) (toRawFilePath fa) where -- A broken .git/config is not recoverable. -- Don't damage hook scripts, to avoid running arbitrary code. ;) diff --git a/Git/FilePath.hs b/Git/FilePath.hs index feed8f6..b27c0c7 100644 --- a/Git/FilePath.hs +++ b/Git/FilePath.hs @@ -5,7 +5,7 @@ - top of the repository even when run in a subdirectory. Adding some - types helps keep that straight. - - - Copyright 2012-2019 Joey Hess + - Copyright 2012-2023 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -30,12 +30,12 @@ module Git.FilePath ( import Common import Git +import Git.Quote import qualified System.FilePath.ByteString as P import qualified System.FilePath.Posix.ByteString import GHC.Generics import Control.DeepSeq -import qualified Data.ByteString as S {- A RawFilePath, relative to the top of the git repository. -} newtype TopFilePath = TopFilePath { getTopFilePath :: RawFilePath } @@ -46,11 +46,11 @@ instance NFData TopFilePath {- A file in a branch or other treeish. -} data BranchFilePath = BranchFilePath Ref TopFilePath deriving (Show, Eq, Ord) - + {- Git uses the branch:file form to refer to a BranchFilePath -} -descBranchFilePath :: BranchFilePath -> S.ByteString +descBranchFilePath :: BranchFilePath -> StringContainingQuotedPath descBranchFilePath (BranchFilePath b f) = - fromRef' b <> ":" <> getTopFilePath f + UnquotedByteString (fromRef' b) <> ":" <> QuotedPath (getTopFilePath f) {- Path to a TopFilePath, within the provided git repo. -} fromTopFilePath :: TopFilePath -> Git.Repo -> RawFilePath diff --git a/Git/Filename.hs b/Git/Filename.hs deleted file mode 100644 index 2fa4c59..0000000 --- a/Git/Filename.hs +++ /dev/null @@ -1,49 +0,0 @@ -{- Some git commands output encoded filenames, in a rather annoyingly complex - - C-style encoding. - - - - Copyright 2010, 2011 Joey Hess - - - - Licensed under the GNU AGPL version 3 or higher. - -} - -module Git.Filename where - -import Common -import Utility.Format (decode_c, encode_c) -import Utility.QuickCheck - -import Data.Char -import Data.Word -import qualified Data.ByteString as S - --- encoded filenames will be inside double quotes -decode :: S.ByteString -> RawFilePath -decode b = case S.uncons b of - Nothing -> b - Just (h, t) - | h /= q -> b - | otherwise -> case S.unsnoc t of - Nothing -> b - Just (i, l) - | l /= q -> b - | otherwise -> - encodeBS $ decode_c $ decodeBS i - where - q :: Word8 - q = fromIntegral (ord '"') - -{- Should not need to use this, except for testing decode. -} -encode :: RawFilePath -> S.ByteString -encode s = encodeBS $ "\"" ++ encode_c (decodeBS s) ++ "\"" - --- Encoding and then decoding roundtrips only when the string does not --- contain high unicode, because eg, both "\12345" and "\227\128\185" --- are encoded to "\343\200\271". --- --- That is not a real-world problem, and using TestableFilePath --- limits what's tested to ascii, so avoids running into it. -prop_encode_decode_roundtrip :: TestableFilePath -> Bool -prop_encode_decode_roundtrip ts = - s == fromRawFilePath (decode (encode (toRawFilePath s))) - where - s = fromTestableFilePath ts diff --git a/Git/HashObject.hs b/Git/HashObject.hs index 98bd440..1474c57 100644 --- a/Git/HashObject.hs +++ b/Git/HashObject.hs @@ -1,6 +1,6 @@ {- git hash-object interface - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2023 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -21,26 +21,47 @@ import qualified Data.ByteString as S import qualified Data.ByteString.Char8 as S8 import qualified Data.ByteString.Lazy as L import Data.ByteString.Builder +import Data.Char -type HashObjectHandle = CoProcess.CoProcessHandle +data HashObjectHandle = HashObjectHandle CoProcess.CoProcessHandle Repo [CommandParam] hashObjectStart :: Bool -> Repo -> IO HashObjectHandle -hashObjectStart writeobject = gitCoProcessStart True $ catMaybes - [ Just (Param "hash-object") - , if writeobject then Just (Param "-w") else Nothing - , Just (Param "--stdin-paths") - , Just (Param "--no-filters") - ] +hashObjectStart writeobject repo = do + h <- gitCoProcessStart True (ps ++ [Param "--stdin-paths"]) repo + return (HashObjectHandle h repo ps) + where + ps = catMaybes + [ Just (Param "hash-object") + , if writeobject then Just (Param "-w") else Nothing + , Just (Param "--no-filters") + ] hashObjectStop :: HashObjectHandle -> IO () -hashObjectStop = CoProcess.stop +hashObjectStop (HashObjectHandle h _ _) = CoProcess.stop h {- Injects a file into git, returning the Sha of the object. -} hashFile :: HashObjectHandle -> RawFilePath -> IO Sha -hashFile h file = CoProcess.query h send receive +hashFile hdl@(HashObjectHandle h _ _) file = do + -- git hash-object chdirs to the top of the repository on + -- start, so if the filename is relative, it will + -- not work. This seems likely to be a git bug. + -- So, make the filename absolute, which will work now + -- and also if git's behavior later changes. + file' <- absPath file + if newline `S.elem` file' + then hashFile' hdl file + else CoProcess.query h (send file') receive where - send to = S8.hPutStrLn to =<< absPath file + send file' to = S8.hPutStrLn to file' receive from = getSha "hash-object" $ S8.hGetLine from + newline = fromIntegral (ord '\n') + +{- Runs git hash-object once per call, rather than using a running + - one, so is slower. But, is able to handle newlines in the filepath, + - which --stdin-paths cannot. -} +hashFile' :: HashObjectHandle -> RawFilePath -> IO Sha +hashFile' (HashObjectHandle _ repo ps) file = getSha "hash-object" $ + pipeReadStrict (ps ++ [File (fromRawFilePath file)]) repo class HashableBlob t where hashableBlobToHandle :: Handle -> t -> IO () diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs index cc824f2..4eea395 100644 --- a/Git/LsFiles.hs +++ b/Git/LsFiles.hs @@ -325,7 +325,7 @@ reduceUnmerged c (i:is) = reduceUnmerged (new:c) rest && isus x || isus y && not (isus x && isus y) -{- Gets the InodeCache equivilant information stored in the git index. +{- Gets the InodeCache equivalent information stored in the git index. - - Note that this uses a --debug option whose output could change at some - point in the future. If the output is not as expected, will use Nothing. diff --git a/Git/LsTree.hs b/Git/LsTree.hs index fb3b3e1..9129d18 100644 --- a/Git/LsTree.hs +++ b/Git/LsTree.hs @@ -23,7 +23,7 @@ import Common import Git import Git.Command import Git.FilePath -import qualified Git.Filename +import qualified Git.Quote import Utility.Attoparsec import Numeric @@ -137,7 +137,7 @@ parserLsTree long = case long of -- sha <*> (Ref <$> A8.takeTill A8.isSpace) - fileparser = asTopFilePath . Git.Filename.decode <$> A.takeByteString + fileparser = asTopFilePath . Git.Quote.unquote <$> A.takeByteString sizeparser = fmap Just A8.decimal diff --git a/Git/Quote.hs b/Git/Quote.hs new file mode 100644 index 0000000..2ca442e --- /dev/null +++ b/Git/Quote.hs @@ -0,0 +1,122 @@ +{- Some git commands output quoted filenames, in a rather annoyingly complex + - C-style encoding. + - + - Copyright 2010-2023 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings, TypeSynonymInstances #-} + +module Git.Quote ( + unquote, + quote, + noquote, + QuotePath(..), + StringContainingQuotedPath(..), + quotedPaths, + prop_quote_unquote_roundtrip, +) where + +import Common +import Utility.Format (decode_c, encode_c, encode_c', isUtf8Byte) +import Utility.QuickCheck +import Utility.SafeOutput + +import Data.Char +import Data.Word +import Data.String +import qualified Data.ByteString as S +import qualified Data.Semigroup as Sem +import Prelude + +unquote :: S.ByteString -> RawFilePath +unquote b = case S.uncons b of + Nothing -> b + Just (h, t) + | h /= q -> b + | otherwise -> case S.unsnoc t of + Nothing -> b + Just (i, l) + | l /= q -> b + | otherwise -> decode_c i + where + q :: Word8 + q = fromIntegral (ord '"') + +-- always encodes and double quotes, even in cases that git does not +quoteAlways :: RawFilePath -> S.ByteString +quoteAlways s = "\"" <> encode_c needencode s <> "\"" + where + needencode c = isUtf8Byte c || c == fromIntegral (ord '"') + +-- git config core.quotePath controls whether to quote unicode characters +newtype QuotePath = QuotePath Bool + +class Quoteable t where + -- double quotes and encodes when git would + quote :: QuotePath -> t -> S.ByteString + + noquote :: t -> S.ByteString + +instance Quoteable RawFilePath where + quote (QuotePath qp) s = case encode_c' needencode s of + Nothing -> s + Just s' -> "\"" <> s' <> "\"" + where + needencode c + | c == fromIntegral (ord '"') = True + | qp = isUtf8Byte c + | otherwise = False + + noquote = id + +-- Allows building up a string that contains paths, which will get quoted. +-- With OverloadedStrings, strings are passed through without quoting. +-- Eg: QuotedPath f <> ": not found" +data StringContainingQuotedPath + = UnquotedString String + | UnquotedByteString S.ByteString + | QuotedPath RawFilePath + | StringContainingQuotedPath :+: StringContainingQuotedPath + deriving (Show, Eq) + +quotedPaths :: [RawFilePath] -> StringContainingQuotedPath +quotedPaths [] = mempty +quotedPaths (p:ps) = QuotedPath p <> if null ps + then mempty + else " " <> quotedPaths ps + +instance Quoteable StringContainingQuotedPath where + quote _ (UnquotedString s) = safeOutput (encodeBS s) + quote _ (UnquotedByteString s) = safeOutput s + quote qp (QuotedPath p) = quote qp p + quote qp (a :+: b) = quote qp a <> quote qp b + + noquote (UnquotedString s) = encodeBS s + noquote (UnquotedByteString s) = s + noquote (QuotedPath p) = p + noquote (a :+: b) = noquote a <> noquote b + +instance IsString StringContainingQuotedPath where + fromString = UnquotedByteString . encodeBS + +instance Sem.Semigroup StringContainingQuotedPath where + UnquotedString a <> UnquotedString b = UnquotedString (a <> b) + UnquotedByteString a <> UnquotedByteString b = UnquotedByteString (a <> b) + a <> b = a :+: b + +instance Monoid StringContainingQuotedPath where + mempty = UnquotedByteString mempty + +-- Encoding and then decoding roundtrips only when the string does not +-- contain high unicode, because eg, both "\12345" and "\227\128\185" +-- are encoded to "\343\200\271". +-- +-- That is not a real-world problem, and using TestableFilePath +-- limits what's tested to ascii, so avoids running into it. +prop_quote_unquote_roundtrip :: TestableFilePath -> Bool +prop_quote_unquote_roundtrip ts = + s == fromRawFilePath (unquote (quoteAlways (toRawFilePath s))) + where + s = fromTestableFilePath ts diff --git a/Git/Remote.hs b/Git/Remote.hs index 80accca..9cdaad6 100644 --- a/Git/Remote.hs +++ b/Git/Remote.hs @@ -43,7 +43,7 @@ remoteKeyToRemoteName (ConfigKey k) {- Construct a legal git remote name out of an arbitrary input string. - - There seems to be no formal definition of this in the git source, - - just some ad-hoc checks, and some other things that fail with certian + - just some ad-hoc checks, and some other things that fail with certain - types of names (like ones starting with '-'). -} makeLegalName :: String -> RemoteName @@ -63,7 +63,7 @@ makeLegalName s = case filter legal $ replace "/" "_" s of legal c = isAlphaNum c data RemoteLocation = RemoteUrl String | RemotePath FilePath - deriving (Eq) + deriving (Eq, Show) remoteLocationIsUrl :: RemoteLocation -> Bool remoteLocationIsUrl (RemoteUrl _) = True @@ -75,16 +75,18 @@ remoteLocationIsSshUrl _ = False {- Determines if a given remote location is an url, or a local - path. Takes the repository's insteadOf configuration into account. -} -parseRemoteLocation :: String -> Repo -> RemoteLocation -parseRemoteLocation s repo = ret $ calcloc s +parseRemoteLocation :: String -> Bool -> Repo -> RemoteLocation +parseRemoteLocation s knownurl repo = go where - ret v + s' = calcloc s + go #ifdef mingw32_HOST_OS - | dosstyle v = RemotePath (dospath v) + | dosstyle s' = RemotePath (dospath s') #endif - | scpstyle v = RemoteUrl (scptourl v) - | urlstyle v = RemoteUrl v - | otherwise = RemotePath v + | scpstyle s' = RemoteUrl (scptourl s') + | urlstyle s' = RemoteUrl s' + | knownurl && s' == s = RemoteUrl s' + | otherwise = RemotePath s' -- insteadof config can rewrite remote location calcloc l | null insteadofs = l diff --git a/Git/Repair.hs b/Git/Repair.hs index 7d47f84..cea57df 100644 --- a/Git/Repair.hs +++ b/Git/Repair.hs @@ -30,6 +30,7 @@ import Git.Types import Git.Fsck import Git.Index import Git.Env +import Git.FilePath import qualified Git.Config as Config import qualified Git.Construct as Construct import qualified Git.LsTree as LsTree @@ -95,7 +96,7 @@ explodePacks r = go =<< listPackFiles r let dest = objectsDir r P. f createDirectoryIfMissing True (fromRawFilePath (parentDir dest)) - moveFile objfile (fromRawFilePath dest) + moveFile (toRawFilePath objfile) dest forM_ packs $ \packfile -> do let f = toRawFilePath packfile removeWhenExistsWith R.removeLink f @@ -103,7 +104,7 @@ explodePacks r = go =<< listPackFiles r return True {- Try to retrieve a set of missing objects, from the remotes of a - - repository. Returns any that could not be retreived. + - repository. Returns any that could not be retrieved. - - If another clone of the repository exists locally, which might not be a - remote of the repo being repaired, its path can be passed as a reference @@ -252,7 +253,8 @@ getAllRefs r = getAllRefs' (fromRawFilePath (localGitDir r) "refs") getAllRefs' :: FilePath -> IO [Ref] getAllRefs' refdir = do let topsegs = length (splitPath refdir) - 1 - let toref = Ref . encodeBS . joinPath . drop topsegs . splitPath + let toref = Ref . toInternalGitPath . encodeBS + . joinPath . drop topsegs . splitPath map toref <$> dirContentsRecursive refdir explodePackedRefsFile :: Repo -> IO () @@ -269,7 +271,7 @@ explodePackedRefsFile r = do let gitd = localGitDir r let dest = gitd P. fromRef' ref let dest' = fromRawFilePath dest - createDirectoryUnder gitd (parentDir dest) + createDirectoryUnder [gitd] (parentDir dest) unlessM (doesFileExist dest') $ writeFile dest' (fromRef sha) @@ -433,7 +435,7 @@ rewriteIndex r reinject (file, sha, mode, _) = case toTreeItemType mode of Nothing -> return Nothing Just treeitemtype -> Just <$> - UpdateIndex.stageFile sha treeitemtype (fromRawFilePath file) r + UpdateIndex.stageFile sha treeitemtype file r newtype GoodCommits = GoodCommits (S.Set Sha) diff --git a/Git/Sha.hs b/Git/Sha.hs index a66c34e..389bcc0 100644 --- a/Git/Sha.hs +++ b/Git/Sha.hs @@ -20,7 +20,7 @@ import Data.Char getSha :: String -> IO S.ByteString -> IO Sha getSha subcommand a = maybe bad return =<< extractSha <$> a where - bad = error $ "failed to read sha from git " ++ subcommand + bad = giveup $ "failed to read sha from git " ++ subcommand {- Extracts the Sha from a ByteString. - diff --git a/Git/Types.hs b/Git/Types.hs index 68045fc..ce1818e 100644 --- a/Git/Types.hs +++ b/Git/Types.hs @@ -51,6 +51,8 @@ data Repo = Repo , gitEnvOverridesGitDir :: Bool -- global options to pass to git when running git commands , gitGlobalOpts :: [CommandParam] + -- True only when --git-dir or GIT_DIR was used + , gitDirSpecifiedExplicitly :: Bool } deriving (Show, Eq, Ord) newtype ConfigKey = ConfigKey S.ByteString diff --git a/Git/UpdateIndex.hs b/Git/UpdateIndex.hs index 74816a6..f56bc86 100644 --- a/Git/UpdateIndex.hs +++ b/Git/UpdateIndex.hs @@ -1,6 +1,6 @@ {- git-update-index library - - - Copyright 2011-2020 Joey Hess + - Copyright 2011-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -99,15 +99,15 @@ updateIndexLine sha treeitemtype file = L.fromStrict $ <> "\t" <> indexPath file -stageFile :: Sha -> TreeItemType -> FilePath -> Repo -> IO Streamer +stageFile :: Sha -> TreeItemType -> RawFilePath -> Repo -> IO Streamer stageFile sha treeitemtype file repo = do - p <- toTopFilePath (toRawFilePath file) repo + p <- toTopFilePath file repo return $ pureStreamer $ updateIndexLine sha treeitemtype p {- A streamer that removes a file from the index. -} -unstageFile :: FilePath -> Repo -> IO Streamer +unstageFile :: RawFilePath -> Repo -> IO Streamer unstageFile file repo = do - p <- toTopFilePath (toRawFilePath file) repo + p <- toTopFilePath file repo return $ unstageFile' p unstageFile' :: TopFilePath -> Streamer @@ -135,9 +135,17 @@ stageDiffTreeItem d = case toTreeItemType (Diff.dstmode d) of indexPath :: TopFilePath -> InternalGitPath indexPath = toInternalGitPath . getTopFilePath -{- Refreshes the index, by checking file stat information. -} -refreshIndex :: Repo -> ((RawFilePath -> IO ()) -> IO ()) -> IO Bool -refreshIndex repo feeder = withCreateProcess p go +{- Refreshes the index, by checking file stat information. + - + - The action is passed a callback that it can use to send filenames to + - update-index. Sending Nothing will wait for update-index to finish + - updating the index. + -} +refreshIndex :: (MonadIO m, MonadMask m) => Repo -> ((Maybe RawFilePath -> IO ()) -> m ()) -> m () +refreshIndex repo feeder = bracket + (liftIO $ createProcess p) + (liftIO . cleanupProcess) + go where params = [ Param "update-index" @@ -150,10 +158,12 @@ refreshIndex repo feeder = withCreateProcess p go p = (gitCreateProcess params repo) { std_in = CreatePipe } - go (Just h) _ _ pid = do - feeder $ \f -> - S.hPut h (S.snoc f 0) - hFlush h - hClose h - checkSuccessProcess pid - go _ _ _ _ = error "internal" + go (Just h, _, _, pid) = do + let closer = do + hClose h + forceSuccessProcess p pid + feeder $ \case + Just f -> S.hPut h (S.snoc f 0) + Nothing -> closer + liftIO $ closer + go _ = error "internal" diff --git a/Utility/CopyFile.hs b/Utility/CopyFile.hs index 9c93e70..207153d 100644 --- a/Utility/CopyFile.hs +++ b/Utility/CopyFile.hs @@ -14,6 +14,7 @@ module Utility.CopyFile ( import Common import qualified BuildInfo +import qualified Utility.RawFilePath as R data CopyMetaData -- Copy timestamps when possible, but no other metadata, and @@ -60,9 +61,6 @@ copyFileExternal meta src dest = do - - The dest file must not exist yet, or it will fail to make a CoW copy, - and will return False. - - - - Note that in coreutil 9.0, cp uses CoW by default, without needing an - - option. This code is only needed to support older versions. -} copyCoW :: CopyMetaData -> FilePath -> FilePath -> IO Bool copyCoW meta src dest @@ -82,14 +80,17 @@ copyCoW meta src dest return ok | otherwise = return False where + -- Note that in coreutils 9.0, cp uses CoW by default, + -- without needing an option. This s only needed to support + -- older versions. params = Param "--reflink=always" : copyMetaDataParams meta {- Create a hard link if the filesystem allows it, and fall back to copying - the file. -} -createLinkOrCopy :: FilePath -> FilePath -> IO Bool +createLinkOrCopy :: RawFilePath -> RawFilePath -> IO Bool createLinkOrCopy src dest = go `catchIO` const fallback where go = do - createLink src dest + R.createLink src dest return True - fallback = copyFileExternal CopyAllMetaData src dest + fallback = copyFileExternal CopyAllMetaData (fromRawFilePath src) (fromRawFilePath dest) diff --git a/Utility/DataUnits.hs b/Utility/DataUnits.hs index a6c9ffc..8d910c6 100644 --- a/Utility/DataUnits.hs +++ b/Utility/DataUnits.hs @@ -1,6 +1,6 @@ {- data size display and parsing - - - Copyright 2011 Joey Hess + - Copyright 2011-2022 Joey Hess - - License: BSD-2-clause - @@ -21,14 +21,20 @@ - error. This was bad. - - So, a committee was formed. And it arrived at a committee-like decision, - - which satisfied noone, confused everyone, and made the world an uglier - - place. As with all committees, this was meh. + - which satisfied no one, confused everyone, and made the world an uglier + - place. As with all committees, this was meh. Or in this case, "mib". - - And the drive manufacturers happily continued selling drives that are - increasingly smaller than you'd expect, if you don't count on your - fingers. But that are increasingly too big for anyone to much notice. - This caused me to need git-annex. - + - Meanwhile, over in telecommunications land, they were using entirely + - different units that differ only in capitalization sometimes. + - (At one point this convinced me that it was a good idea to buy an ISDN + - line because 128 kb/s sounded really fast! But it was really only 128 + - kbit/s...) + - - Thus, I use units here that I loathe. Because if I didn't, people would - be confused that their drives seem the wrong size, and other people would - complain at me for not being standards compliant. And we call this @@ -38,7 +44,7 @@ module Utility.DataUnits ( dataUnits, storageUnits, - memoryUnits, + committeeUnits, bandwidthUnits, oldSchoolUnits, Unit(..), @@ -62,28 +68,30 @@ data Unit = Unit ByteSize Abbrev Name deriving (Ord, Show, Eq) dataUnits :: [Unit] -dataUnits = storageUnits ++ memoryUnits +dataUnits = storageUnits ++ committeeUnits ++ bandwidthUnits {- Storage units are (stupidly) powers of ten. -} storageUnits :: [Unit] storageUnits = - [ Unit (p 8) "YB" "yottabyte" + [ Unit (p 10) "QB" "quettabyte" + , Unit (p 9) "RB" "ronnabyte" + , Unit (p 8) "YB" "yottabyte" , Unit (p 7) "ZB" "zettabyte" , Unit (p 6) "EB" "exabyte" , Unit (p 5) "PB" "petabyte" , Unit (p 4) "TB" "terabyte" , Unit (p 3) "GB" "gigabyte" , Unit (p 2) "MB" "megabyte" - , Unit (p 1) "kB" "kilobyte" -- weird capitalization thanks to committe - , Unit (p 0) "B" "byte" + , Unit (p 1) "kB" "kilobyte" -- weird capitalization thanks to committee + , Unit 1 "B" "byte" ] where p :: Integer -> Integer p n = 1000^n -{- Memory units are (stupidly named) powers of 2. -} -memoryUnits :: [Unit] -memoryUnits = +{- Committee units are (stupidly named) powers of 2. -} +committeeUnits :: [Unit] +committeeUnits = [ Unit (p 8) "YiB" "yobibyte" , Unit (p 7) "ZiB" "zebibyte" , Unit (p 6) "EiB" "exbibyte" @@ -92,19 +100,37 @@ memoryUnits = , Unit (p 3) "GiB" "gibibyte" , Unit (p 2) "MiB" "mebibyte" , Unit (p 1) "KiB" "kibibyte" - , Unit (p 0) "B" "byte" + , Unit 1 "B" "byte" ] where p :: Integer -> Integer p n = 2^(n*10) -{- Bandwidth units are only measured in bits if you're some crazy telco. -} +{- Bandwidth units are (stupidly) measured in bits, not bytes, and are + - (also stupidly) powers of ten. + - + - While it's fairly common for "Mb", "Gb" etc to be used, that differs + - from "MB", "GB", etc only in case, and readSize is case-insensitive. + - So "Mbit", "Gbit" etc are used instead to avoid parsing ambiguity. + -} bandwidthUnits :: [Unit] -bandwidthUnits = error "stop trying to rip people off" +bandwidthUnits = + [ Unit (p 8) "Ybit" "yottabit" + , Unit (p 7) "Zbit" "zettabit" + , Unit (p 6) "Ebit" "exabit" + , Unit (p 5) "Pbit" "petabit" + , Unit (p 4) "Tbit" "terabit" + , Unit (p 3) "Gbit" "gigabit" + , Unit (p 2) "Mbit" "megabit" + , Unit (p 1) "kbit" "kilobit" -- weird capitalization thanks to committee + ] + where + p :: Integer -> Integer + p n = (1000^n) `div` 8 {- Do you yearn for the days when men were men and megabytes were megabytes? -} oldSchoolUnits :: [Unit] -oldSchoolUnits = zipWith (curry mingle) storageUnits memoryUnits +oldSchoolUnits = zipWith (curry mingle) storageUnits committeeUnits where mingle (Unit _ a n, Unit s' _ _) = Unit s' a n diff --git a/Utility/Directory.hs b/Utility/Directory.hs index 38adf17..a5c023f 100644 --- a/Utility/Directory.hs +++ b/Utility/Directory.hs @@ -16,7 +16,7 @@ module Utility.Directory ( import Control.Monad import System.FilePath -import System.PosixCompat.Files hiding (removeLink) +import System.PosixCompat.Files (isDirectory, isSymbolicLink) import Control.Applicative import System.IO.Unsafe (unsafeInterleaveIO) import Data.Maybe @@ -25,7 +25,8 @@ import Prelude import Utility.SystemDirectory import Utility.Exception import Utility.Monad -import Utility.Applicative +import Utility.FileSystemEncoding +import qualified Utility.RawFilePath as R dirCruft :: FilePath -> Bool dirCruft "." = True @@ -65,7 +66,7 @@ dirContentsRecursiveSkipping skipdir followsubdirsymlinks topdir = go [topdir] | otherwise = do let skip = collect (entry:files) dirs' entries let recurse = collect files (entry:dirs') entries - ms <- catchMaybeIO $ getSymbolicLinkStatus entry + ms <- catchMaybeIO $ R.getSymbolicLinkStatus (toRawFilePath entry) case ms of (Just s) | isDirectory s -> recurse @@ -87,9 +88,10 @@ dirTreeRecursiveSkipping skipdir topdir = go [] [topdir] | skipdir (takeFileName dir) = go c dirs | otherwise = unsafeInterleaveIO $ do subdirs <- go [] - =<< filterM (isDirectory <$$> getSymbolicLinkStatus) + =<< filterM isdir =<< catchDefaultIO [] (dirContents dir) go (subdirs++dir:c) dirs + isdir p = isDirectory <$> R.getSymbolicLinkStatus (toRawFilePath p) {- Use with an action that removes something, which may or may not exist. - diff --git a/Utility/Directory/Create.hs b/Utility/Directory/Create.hs index 32c0bcf..5650f96 100644 --- a/Utility/Directory/Create.hs +++ b/Utility/Directory/Create.hs @@ -31,10 +31,10 @@ import qualified Utility.RawFilePath as R import Utility.PartialPrelude {- Like createDirectoryIfMissing True, but it will only create - - missing parent directories up to but not including the directory - - in the first parameter. + - missing parent directories up to but not including a directory + - from the first parameter. - - - For example, createDirectoryUnder "/tmp/foo" "/tmp/foo/bar/baz" + - For example, createDirectoryUnder ["/tmp/foo"] "/tmp/foo/bar/baz" - will create /tmp/foo/bar if necessary, but if /tmp/foo does not exist, - it will throw an exception. - @@ -45,40 +45,43 @@ import Utility.PartialPrelude - FilePath (or the same as it), it will fail with an exception - even if the second FilePath's parent directory already exists. - - - Either or both of the FilePaths can be relative, or absolute. + - The FilePaths can be relative, or absolute. - They will be normalized as necessary. - - Note that, the second FilePath, if relative, is relative to the current - - working directory, not to the first FilePath. + - working directory. -} -createDirectoryUnder :: RawFilePath -> RawFilePath -> IO () -createDirectoryUnder topdir dir = - createDirectoryUnder' topdir dir R.createDirectory +createDirectoryUnder :: [RawFilePath] -> RawFilePath -> IO () +createDirectoryUnder topdirs dir = + createDirectoryUnder' topdirs dir R.createDirectory createDirectoryUnder' :: (MonadIO m, MonadCatch m) - => RawFilePath + => [RawFilePath] -> RawFilePath -> (RawFilePath -> m ()) -> m () -createDirectoryUnder' topdir dir0 mkdir = do - p <- liftIO $ relPathDirToFile topdir dir0 - let dirs = P.splitDirectories p - -- Catch cases where the dir is not beneath the topdir. +createDirectoryUnder' topdirs dir0 mkdir = do + relps <- liftIO $ forM topdirs $ \topdir -> relPathDirToFile topdir dir0 + let relparts = map P.splitDirectories relps + -- Catch cases where dir0 is not beneath a topdir. -- If the relative path between them starts with "..", -- it's not. And on Windows, if they are on different drives, -- the path will not be relative. - if headMaybe dirs == Just ".." || P.isAbsolute p - then liftIO $ ioError $ customerror userErrorType - ("createDirectoryFrom: not located in " ++ fromRawFilePath topdir) - -- If dir0 is the same as the topdir, don't try to create - -- it, but make sure it does exist. - else if null dirs - then liftIO $ unlessM (doesDirectoryExist (fromRawFilePath topdir)) $ - ioError $ customerror doesNotExistErrorType - "createDirectoryFrom: does not exist" - else createdirs $ - map (topdir P.) (reverse (scanl1 (P.) dirs)) + let notbeneath = \(_topdir, (relp, dirs)) -> + headMaybe dirs /= Just ".." && not (P.isAbsolute relp) + case filter notbeneath $ zip topdirs (zip relps relparts) of + ((topdir, (_relp, dirs)):_) + -- If dir0 is the same as the topdir, don't try to + -- create it, but make sure it does exist. + | null dirs -> + liftIO $ unlessM (doesDirectoryExist (fromRawFilePath topdir)) $ + ioError $ customerror doesNotExistErrorType $ + "createDirectoryFrom: " ++ fromRawFilePath topdir ++ " does not exist" + | otherwise -> createdirs $ + map (topdir P.) (reverse (scanl1 (P.) dirs)) + _ -> liftIO $ ioError $ customerror userErrorType + ("createDirectoryFrom: not located in " ++ unwords (map fromRawFilePath topdirs)) where customerror t s = mkIOError t s Nothing (Just (fromRawFilePath dir0)) diff --git a/Utility/Exception.hs b/Utility/Exception.hs index 4c60eac..cf55c5f 100644 --- a/Utility/Exception.hs +++ b/Utility/Exception.hs @@ -1,6 +1,6 @@ {- Simple IO exception handling (and some more) - - - Copyright 2011-2016 Joey Hess + - Copyright 2011-2023 Joey Hess - - License: BSD-2-clause -} @@ -20,6 +20,7 @@ module Utility.Exception ( bracketIO, catchNonAsync, tryNonAsync, + nonAsyncHandler, tryWhenExists, catchIOErrorType, IOErrorType(..), @@ -28,21 +29,24 @@ module Utility.Exception ( import Control.Monad.Catch as X hiding (Handler) import qualified Control.Monad.Catch as M -import Control.Exception (IOException, AsyncException) -import Control.Exception (SomeAsyncException) +import Control.Exception (IOException, AsyncException, SomeAsyncException) import Control.Monad import Control.Monad.IO.Class (liftIO, MonadIO) import System.IO.Error (isDoesNotExistError, ioeGetErrorType) import GHC.IO.Exception (IOErrorType(..)) import Utility.Data +import Utility.SafeOutput {- Like error, this throws an exception. Unlike error, if this exception - is not caught, it won't generate a backtrace. So use this for situations - where there's a problem that the user is expected to see in some - - circumstances. -} + - circumstances. + - + - Also, control characters are filtered out of the message. + -} giveup :: [Char] -> a -giveup = errorWithoutStackTrace +giveup = errorWithoutStackTrace . safeOutput {- Catches IO errors and returns a Bool -} catchBoolIO :: MonadCatch m => m Bool -> m Bool @@ -81,11 +85,7 @@ bracketIO setup cleanup = bracket (liftIO setup) (liftIO . cleanup) - ThreadKilled and UserInterrupt get through. -} catchNonAsync :: MonadCatch m => m a -> (SomeException -> m a) -> m a -catchNonAsync a onerr = a `catches` - [ M.Handler (\ (e :: AsyncException) -> throwM e) - , M.Handler (\ (e :: SomeAsyncException) -> throwM e) - , M.Handler (\ (e :: SomeException) -> onerr e) - ] +catchNonAsync a onerr = a `catches` (nonAsyncHandler onerr) tryNonAsync :: MonadCatch m => m a -> m (Either SomeException a) tryNonAsync a = go `catchNonAsync` (return . Left) @@ -94,6 +94,13 @@ tryNonAsync a = go `catchNonAsync` (return . Left) v <- a return (Right v) +nonAsyncHandler :: MonadCatch m => (SomeException -> m a) -> [M.Handler m a] +nonAsyncHandler onerr = + [ M.Handler (\ (e :: AsyncException) -> throwM e) + , M.Handler (\ (e :: SomeAsyncException) -> throwM e) + , M.Handler (\ (e :: SomeException) -> onerr e) + ] + {- Catches only DoesNotExist exceptions, and lets all others through. -} tryWhenExists :: MonadCatch m => m a -> m (Maybe a) tryWhenExists a = do diff --git a/Utility/FileMode.hs b/Utility/FileMode.hs index 6725601..ecc19d8 100644 --- a/Utility/FileMode.hs +++ b/Utility/FileMode.hs @@ -1,6 +1,6 @@ {- File mode utilities. - - - Copyright 2010-2020 Joey Hess + - Copyright 2010-2023 Joey Hess - - License: BSD-2-clause -} @@ -16,7 +16,10 @@ module Utility.FileMode ( import System.IO import Control.Monad import System.PosixCompat.Types -import System.PosixCompat.Files hiding (removeLink) +import System.PosixCompat.Files (unionFileModes, intersectFileModes, stdFileMode, nullFileMode, groupReadMode, ownerReadMode, ownerWriteMode, ownerExecuteMode, groupWriteMode, groupExecuteMode, otherReadMode, otherWriteMode, otherExecuteMode, fileMode) +#ifndef mingw32_HOST_OS +import System.PosixCompat.Files (setFileCreationMask) +#endif import Control.Monad.IO.Class import Foreign (complement) import Control.Monad.Catch @@ -100,16 +103,19 @@ checkMode checkfor mode = checkfor `intersectFileModes` mode == checkfor isExecutable :: FileMode -> Bool isExecutable mode = combineModes executeModes `intersectFileModes` mode /= 0 -{- Runs an action without that pesky umask influencing it, unless the - - passed FileMode is the standard one. -} -noUmask :: (MonadIO m, MonadMask m) => FileMode -> m a -> m a -#ifndef mingw32_HOST_OS -noUmask mode a - | mode == stdFileMode = a - | otherwise = withUmask nullFileMode a -#else -noUmask _ a = a -#endif +data ModeSetter = ModeSetter FileMode (RawFilePath -> IO ()) + +{- Runs an action which should create the file, passing it the desired + - initial file mode. Then runs the ModeSetter's action on the file, which + - can adjust the initial mode if umask prevented the file from being + - created with the right mode. -} +applyModeSetter :: Maybe ModeSetter -> RawFilePath -> (Maybe FileMode -> IO a) -> IO a +applyModeSetter (Just (ModeSetter mode modeaction)) file a = do + r <- a (Just mode) + void $ tryIO $ modeaction file + return r +applyModeSetter Nothing _ a = + a Nothing withUmask :: (MonadIO m, MonadMask m) => FileMode -> m a -> m a #ifndef mingw32_HOST_OS @@ -169,10 +175,10 @@ writeFileProtected file content = writeFileProtected' file (\h -> hPutStr h content) writeFileProtected' :: RawFilePath -> (Handle -> IO ()) -> IO () -writeFileProtected' file writer = protectedOutput $ - withFile (fromRawFilePath file) WriteMode $ \h -> do - void $ tryIO $ modifyFileMode file $ removeModes otherGroupModes - writer h +writeFileProtected' file writer = do + h <- protectedOutput $ openFile (fromRawFilePath file) WriteMode + void $ tryIO $ modifyFileMode file $ removeModes otherGroupModes + writer h protectedOutput :: IO a -> IO a protectedOutput = withUmask 0o0077 diff --git a/Utility/FileSize.hs b/Utility/FileSize.hs index a503fda..3d216f2 100644 --- a/Utility/FileSize.hs +++ b/Utility/FileSize.hs @@ -14,13 +14,15 @@ module Utility.FileSize ( getFileSize', ) where -import System.PosixCompat.Files hiding (removeLink) -import qualified Utility.RawFilePath as R #ifdef mingw32_HOST_OS import Control.Exception (bracket) import System.IO import Utility.FileSystemEncoding +#else +import System.PosixCompat.Files (fileSize) #endif +import System.PosixCompat.Files (FileStatus) +import qualified Utility.RawFilePath as R type FileSize = Integer diff --git a/Utility/Format.hs b/Utility/Format.hs index 466988c..930b7ee 100644 --- a/Utility/Format.hs +++ b/Utility/Format.hs @@ -1,6 +1,6 @@ {- Formatted string handling. - - - Copyright 2010-2020 Joey Hess + - Copyright 2010-2023 Joey Hess - - License: BSD-2-clause -} @@ -9,10 +9,12 @@ module Utility.Format ( Format, gen, format, + escapedFormat, formatContainsVar, decode_c, encode_c, encode_c', + isUtf8Byte, prop_encode_c_decode_c_roundtrip ) where @@ -21,12 +23,11 @@ import Data.Char (isAlphaNum, isOctDigit, isHexDigit, isSpace, chr, ord, isAscii import Data.Maybe (fromMaybe) import Data.Word (Word8) import Data.List (isPrefixOf) -import qualified Codec.Binary.UTF8.String import qualified Data.Map as M +import qualified Data.ByteString as S import Utility.PartialPrelude - -type FormatString = String +import Utility.FileSystemEncoding {- A format consists of a list of fragments. -} type Format = [Frag] @@ -53,7 +54,8 @@ format f vars = concatMap expand f where expand (Const s) = s expand (Var name j esc) - | esc = justify j $ encode_c' isSpace $ getvar name + | esc = justify j $ decodeBS $ escapedFormat $ + encodeBS $ getvar name | otherwise = justify j $ getvar name getvar name = fromMaybe "" $ M.lookup name vars justify UnJustified s = s @@ -62,6 +64,13 @@ format f vars = concatMap expand f pad i s = take (i - length s) spaces spaces = repeat ' ' +escapedFormat :: S.ByteString -> S.ByteString +escapedFormat = encode_c needescape + where + needescape c = isUtf8Byte c || + isSpace (chr (fromIntegral c)) || + c == fromIntegral (ord '"') + {- Generates a Format that can be used to expand variables in a - format string, such as "${foo} ${bar;10} ${baz;-10}\n" - @@ -69,8 +78,8 @@ format f vars = concatMap expand f - - Also, "${escaped_foo}" will apply encode_c to the value of variable foo. -} -gen :: FormatString -> Format -gen = filter (not . empty) . fuse [] . scan [] . decode_c +gen :: String -> Format +gen = filter (not . empty) . fuse [] . scan [] . decodeBS . decode_c . encodeBS where -- The Format is built up in reverse, for efficiency, -- and can have many adjacent Consts. Fusing it fixes both @@ -122,33 +131,50 @@ formatContainsVar v = any go {- Decodes a C-style encoding, where \n is a newline (etc), - \NNN is an octal encoded character, and \xNN is a hex encoded character. -} -decode_c :: FormatString -> String -decode_c [] = [] -decode_c s = unescape ("", s) +decode_c :: S.ByteString -> S.ByteString +decode_c s + | S.null s = S.empty + | otherwise = unescape (S.empty, s) where - e = '\\' - unescape (b, []) = b - -- look for escapes starting with '\' - unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair) + e = fromIntegral (ord '\\') + x = fromIntegral (ord 'x') + isescape c = c == e + unescape (b, v) + | S.null v = b + | otherwise = b <> fst pair <> unescape (handle $ snd pair) where - pair = span (/= e) v - isescape x = x == e - handle (x:'x':n1:n2:rest) - | isescape x && allhex = (fromhex, rest) + pair = S.span (not . isescape) v + handle b + | S.length b >= 1 && isescape (S.index b 0) = handle' b + | otherwise = (S.empty, b) + + handle' b + | S.length b >= 4 + && S.index b 1 == x + && allhex = (fromhex, rest) where + n1 = chr (fromIntegral (S.index b 2)) + n2 = chr (fromIntegral (S.index b 3)) + rest = S.drop 4 b allhex = isHexDigit n1 && isHexDigit n2 - fromhex = [chr $ readhex [n1, n2]] + fromhex = encodeBS [chr $ readhex [n1, n2]] readhex h = Prelude.read $ "0x" ++ h :: Int - handle (x:n1:n2:n3:rest) - | isescape x && alloctal = (fromoctal, rest) + handle' b + | S.length b >= 4 && alloctal = (fromoctal, rest) where + n1 = chr (fromIntegral (S.index b 1)) + n2 = chr (fromIntegral (S.index b 2)) + n3 = chr (fromIntegral (S.index b 3)) + rest = S.drop 4 b alloctal = isOctDigit n1 && isOctDigit n2 && isOctDigit n3 - fromoctal = [chr $ readoctal [n1, n2, n3]] + fromoctal = encodeBS [chr $ readoctal [n1, n2, n3]] readoctal o = Prelude.read $ "0o" ++ o :: Int - -- \C is used for a few special characters - handle (x:nc:rest) - | isescape x = ([echar nc], rest) + handle' b + | S.length b >= 2 = + (S.singleton (fromIntegral (ord (echar nc))), rest) where + nc = chr (fromIntegral (S.index b 1)) + rest = S.drop 2 b echar 'a' = '\a' echar 'b' = '\b' echar 'f' = '\f' @@ -156,38 +182,50 @@ decode_c s = unescape ("", s) echar 'r' = '\r' echar 't' = '\t' echar 'v' = '\v' - echar a = a - handle n = ("", n) - -{- Inverse of decode_c. -} -encode_c :: String -> FormatString -encode_c = encode_c' (const False) + echar a = a -- \\ decodes to '\', and \" to '"' + handle' b = (S.empty, b) -{- Encodes special characters, as well as any matching the predicate. -} -encode_c' :: (Char -> Bool) -> String -> FormatString -encode_c' p = concatMap echar +{- Inverse of decode_c. Encodes ascii control characters as well as + - bytes that match the predicate. (And also '\' itself.) + -} +encode_c :: (Word8 -> Bool) -> S.ByteString -> S.ByteString +encode_c p s = fromMaybe s (encode_c' p s) + +{- Returns Nothing when nothing needs to be escaped in the input ByteString. -} +encode_c' :: (Word8 -> Bool) -> S.ByteString -> Maybe S.ByteString +encode_c' p s + | S.any needencode s = Just (S.concatMap echar s) + | otherwise = Nothing where - e c = '\\' : [c] - echar '\a' = e 'a' - echar '\b' = e 'b' - echar '\f' = e 'f' - echar '\n' = e 'n' - echar '\r' = e 'r' - echar '\t' = e 't' - echar '\v' = e 'v' - echar '\\' = e '\\' - echar '"' = e '"' + e = fromIntegral (ord '\\') + q = fromIntegral (ord '"') + del = 0x7F + iscontrol c = c < 0x20 + + echar 0x7 = ec 'a' + echar 0x8 = ec 'b' + echar 0x0C = ec 'f' + echar 0x0A = ec 'n' + echar 0x0D = ec 'r' + echar 0x09 = ec 't' + echar 0x0B = ec 'v' echar c - | ord c < 0x20 = e_asc c -- low ascii - | ord c >= 256 = e_utf c -- unicode - | ord c > 0x7E = e_asc c -- high ascii - | p c = e_asc c - | otherwise = [c] - -- unicode character is decomposed to individual Word8s, - -- and each is shown in octal - e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8]) - e_asc c = showoctal $ ord c - showoctal i = '\\' : printf "%03o" i + | iscontrol c = showoctal c -- other control characters + | c == e = ec '\\' -- escape the escape character itself + | c == del = showoctal c + | p c = if c == q + then ec '"' -- escape double quote + else showoctal c + | otherwise = S.singleton c + + needencode c = iscontrol c || c == e || c == del || p c + + ec c = S.pack [e, fromIntegral (ord c)] + + showoctal i = encodeBS ('\\' : printf "%03o" i) + +isUtf8Byte :: Word8 -> Bool +isUtf8Byte c = c >= 0x80 {- For quickcheck. - @@ -198,6 +236,7 @@ encode_c' p = concatMap echar - This property papers over the problem, by only testing ascii. -} prop_encode_c_decode_c_roundtrip :: String -> Bool -prop_encode_c_decode_c_roundtrip s = s' == decode_c (encode_c s') +prop_encode_c_decode_c_roundtrip s = s' == + decodeBS (decode_c (encode_c isUtf8Byte (encodeBS s'))) where s' = filter isAscii s diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs index b697ab3..3828bc6 100644 --- a/Utility/InodeCache.hs +++ b/Utility/InodeCache.hs @@ -32,6 +32,7 @@ module Utility.InodeCache ( inodeCacheToMtime, inodeCacheToEpochTime, inodeCacheEpochTimeRange, + replaceInode, SentinalFile(..), SentinalStatus(..), @@ -50,11 +51,10 @@ import Utility.QuickCheck import qualified Utility.RawFilePath as R import System.PosixCompat.Types +import System.PosixCompat.Files (isRegularFile, fileID) import Data.Time.Clock.POSIX -#ifdef mingw32_HOST_OS -import Data.Word (Word64) -#else +#ifndef mingw32_HOST_OS import qualified System.Posix.Files as Posix #endif @@ -125,7 +125,11 @@ inodeCacheEpochTimeRange i = let t = inodeCacheToEpochTime i in (t-1, t+1) -{- For backwards compatability, support low-res mtime with no +replaceInode :: FileID -> InodeCache -> InodeCache +replaceInode inode (InodeCache (InodeCachePrim _ sz mtime)) = + InodeCache (InodeCachePrim inode sz mtime) + +{- For backwards compatibility, support low-res mtime with no - fractional seconds. -} data MTime = MTimeLowRes EpochTime | MTimeHighRes POSIXTime deriving (Show, Ord) @@ -187,7 +191,7 @@ readInodeCache s = case words s of genInodeCache :: RawFilePath -> TSDelta -> IO (Maybe InodeCache) genInodeCache f delta = catchDefaultIO Nothing $ - toInodeCache delta f =<< R.getFileStatus f + toInodeCache delta f =<< R.getSymbolicLinkStatus f toInodeCache :: TSDelta -> RawFilePath -> FileStatus -> IO (Maybe InodeCache) toInodeCache d f s = toInodeCache' d f s (fileID s) @@ -243,7 +247,7 @@ data SentinalStatus = SentinalStatus - On Windows, time stamp differences are ignored, since they change - with the timezone. - - - When the sential file does not exist, InodeCaches canot reliably be + - When the sential file does not exist, InodeCaches cannot reliably be - compared, so the assumption is that there is has been a change. -} checkSentinalFile :: SentinalFile -> IO SentinalStatus diff --git a/Utility/Metered.hs b/Utility/Metered.hs index 8fd9c9b..a8a7111 100644 --- a/Utility/Metered.hs +++ b/Utility/Metered.hs @@ -53,6 +53,7 @@ import Utility.DataUnits import Utility.HumanTime import Utility.SimpleProtocol as Proto import Utility.ThreadScheduler +import Utility.SafeOutput import qualified Data.ByteString.Lazy as L import qualified Data.ByteString as S @@ -321,7 +322,7 @@ demeterCommandEnv oh cmd params environ = do where stdouthandler l = unless (quietMode oh) $ - putStrLn l + putStrLn (safeOutput l) {- To suppress progress output, while displaying other messages, - filter out lines that contain \r (typically used to reset to the @@ -491,14 +492,14 @@ bandwidthMeter mtotalsize (MeterState (BytesProcessed old) before) (MeterState ( , estimatedcompletion ] where - amount = roughSize' memoryUnits True 2 new + amount = roughSize' committeeUnits True 2 new percentamount = case mtotalsize of Just (TotalSize totalsize) -> let p = showPercentage 0 $ percentage totalsize (min new totalsize) in p ++ replicate (6 - length p) ' ' ++ amount Nothing -> amount - rate = roughSize' memoryUnits True 0 bytespersecond ++ "/s" + rate = roughSize' committeeUnits True 0 bytespersecond ++ "/s" bytespersecond | duration == 0 = fromIntegral transferred | otherwise = floor $ fromIntegral transferred / duration diff --git a/Utility/Misc.hs b/Utility/Misc.hs index 01ae178..3cf5275 100644 --- a/Utility/Misc.hs +++ b/Utility/Misc.hs @@ -12,6 +12,7 @@ module Utility.Misc ( readFileStrict, separate, separate', + separateEnd', firstLine, firstLine', segment, @@ -62,6 +63,13 @@ separate' c l = unbreak $ S.break c l | S.null b = r | otherwise = (a, S.tail b) +separateEnd' :: (Word8 -> Bool) -> S.ByteString -> (S.ByteString, S.ByteString) +separateEnd' c l = unbreak $ S.breakEnd c l + where + unbreak r@(a, b) + | S.null a = r + | otherwise = (S.init a, b) + {- Breaks out the first line. -} firstLine :: String -> String firstLine = takeWhile (/= '\n') @@ -86,7 +94,7 @@ prop_segment_regressionTest :: Bool prop_segment_regressionTest = all id -- Even an empty list is a segment. [ segment (== "--") [] == [[]] - -- There are two segements in this list, even though the first is empty. + -- There are two segments in this list, even though the first is empty. , segment (== "--") ["--", "foo", "bar"] == [[],["foo","bar"]] ] diff --git a/Utility/Monad.hs b/Utility/Monad.hs index abe06f3..6cd2c5e 100644 --- a/Utility/Monad.hs +++ b/Utility/Monad.hs @@ -12,6 +12,7 @@ module Utility.Monad ( getM, anyM, allM, + partitionM, untilTrue, ifM, (<||>), @@ -45,6 +46,13 @@ allM :: Monad m => (a -> m Bool) -> [a] -> m Bool allM _ [] = return True allM p (x:xs) = p x <&&> allM p xs +partitionM :: Monad m => (a -> m Bool) -> [a] -> m ([a], [a]) +partitionM _ [] = return ([], []) +partitionM p (x:xs) = do + r <- p x + (as, bs) <- partitionM p xs + return $ if r then (x:as, bs) else (as, x:bs) + {- Runs an action on values from a list until it succeeds. -} untilTrue :: Monad m => [a] -> (a -> m Bool) -> m Bool untilTrue = flip anyM diff --git a/Utility/MoveFile.hs b/Utility/MoveFile.hs index 3ea17e8..6481b29 100644 --- a/Utility/MoveFile.hs +++ b/Utility/MoveFile.hs @@ -14,12 +14,11 @@ module Utility.MoveFile ( ) where import Control.Monad -import System.FilePath -import System.PosixCompat.Files hiding (removeLink) import System.IO.Error import Prelude #ifndef mingw32_HOST_OS +import System.PosixCompat.Files (isDirectory) import Control.Monad.IfElse import Utility.SafeCommand #endif @@ -28,17 +27,19 @@ import Utility.SystemDirectory import Utility.Tmp import Utility.Exception import Utility.Monad +import Utility.FileSystemEncoding +import qualified Utility.RawFilePath as R {- Moves one filename to another. - First tries a rename, but falls back to moving across devices if needed. -} -moveFile :: FilePath -> FilePath -> IO () -moveFile src dest = tryIO (rename src dest) >>= onrename +moveFile :: RawFilePath -> RawFilePath -> IO () +moveFile src dest = tryIO (R.rename src dest) >>= onrename where onrename (Right _) = noop onrename (Left e) | isPermissionError e = rethrow | isDoesNotExistError e = rethrow - | otherwise = viaTmp mv dest () + | otherwise = viaTmp mv (fromRawFilePath dest) () where rethrow = throwM e @@ -46,16 +47,20 @@ moveFile src dest = tryIO (rename src dest) >>= onrename -- copyFile is likely not as optimised as -- the mv command, so we'll use the command. -- - -- But, while Windows has a "mv", it does not seem very - -- reliable, so use copyFile there. + -- But, while Windows has a "mv", it does not + -- seem very reliable, so use copyFile there. #ifndef mingw32_HOST_OS -- If dest is a directory, mv would move the file -- into it, which is not desired. whenM (isdir dest) rethrow - ok <- boolSystem "mv" [Param "-f", Param src, Param tmp] + ok <- boolSystem "mv" + [ Param "-f" + , Param (fromRawFilePath src) + , Param tmp + ] let e' = e #else - r <- tryIO $ copyFile src tmp + r <- tryIO $ copyFile (fromRawFilePath src) tmp let (ok, e') = case r of Left err -> (False, err) Right _ -> (True, e) @@ -67,7 +72,7 @@ moveFile src dest = tryIO (rename src dest) >>= onrename #ifndef mingw32_HOST_OS isdir f = do - r <- tryIO $ getFileStatus f + r <- tryIO $ R.getSymbolicLinkStatus f case r of (Left _) -> return False (Right s) -> return $ isDirectory s diff --git a/Utility/Path.hs b/Utility/Path.hs index b5aeb16..64ef076 100644 --- a/Utility/Path.hs +++ b/Utility/Path.hs @@ -20,6 +20,7 @@ module Utility.Path ( runSegmentPaths', dotfile, splitShortExtensions, + splitShortExtensions', relPathDirToFileAbs, inSearchPath, searchPath, @@ -53,7 +54,7 @@ import Utility.FileSystemEncoding - - This does not guarantee that two paths that refer to the same location, - and are both relative to the same location (or both absolute) will - - yeild the same result. Run both through normalise from System.RawFilePath + - yield the same result. Run both through normalise from System.RawFilePath - to ensure that. -} simplifyPath :: RawFilePath -> RawFilePath @@ -90,7 +91,7 @@ upFrom dir {- Checks if the first RawFilePath is, or could be said to contain the second. - For example, "foo/" contains "foo/bar". Also, "foo", "./foo", "foo/" etc - - are all equivilant. + - are all equivalent. -} dirContains :: RawFilePath -> RawFilePath -> Bool dirContains a b = a == b diff --git a/Utility/Path/AbsRel.hs b/Utility/Path/AbsRel.hs index 857dd5e..4007fbb 100644 --- a/Utility/Path/AbsRel.hs +++ b/Utility/Path/AbsRel.hs @@ -37,7 +37,7 @@ import Utility.FileSystemEncoding - Also simplifies it using simplifyPath. - - The first parameter is a base directory (ie, the cwd) to use if the path - - is not already absolute, and should itsef be absolute. + - is not already absolute, and should itself be absolute. - - Does not attempt to deal with edge cases or ensure security with - untrusted inputs. diff --git a/Utility/Process.hs b/Utility/Process.hs index 4cf6105..07f035d 100644 --- a/Utility/Process.hs +++ b/Utility/Process.hs @@ -31,6 +31,7 @@ module Utility.Process ( stdoutHandle, stderrHandle, processHandle, + showCmd, devNull, ) where @@ -188,11 +189,13 @@ withCreateProcess p action = bracket (createProcess p) cleanupProcess debugProcess :: CreateProcess -> ProcessHandle -> IO () debugProcess p h = do pid <- getPid h - debug "Utility.Process" $ unwords + debug "Utility.Process" $ unwords $ [ describePid pid , action ++ ":" , showCmd p - ] + ] ++ case cwd p of + Nothing -> [] + Just c -> ["in", show c] where action | piped (std_in p) && piped (std_out p) = "chat" diff --git a/Utility/Process/Transcript.hs b/Utility/Process/Transcript.hs new file mode 100644 index 0000000..7bf94ff --- /dev/null +++ b/Utility/Process/Transcript.hs @@ -0,0 +1,97 @@ +{- Process transcript + - + - Copyright 2012-2020 Joey Hess + - + - License: BSD-2-clause + -} + +{-# LANGUAGE CPP #-} +{-# OPTIONS_GHC -fno-warn-tabs #-} + +module Utility.Process.Transcript ( + processTranscript, + processTranscript', + processTranscript'', +) where + +import Utility.Process + +import System.IO +import System.Exit +import Control.Concurrent.Async +import Control.Monad +#ifndef mingw32_HOST_OS +import Control.Exception +import qualified System.Posix.IO +#else +import Control.Applicative +#endif +import Data.Maybe +import Prelude + +-- | Runs a process and returns a transcript combining its stdout and +-- stderr, and whether it succeeded or failed. +processTranscript :: String -> [String] -> (Maybe String) -> IO (String, Bool) +processTranscript cmd opts = processTranscript' (proc cmd opts) + +-- | Also feeds the process some input. +processTranscript' :: CreateProcess -> Maybe String -> IO (String, Bool) +processTranscript' cp input = do + (t, c) <- processTranscript'' cp input + return (t, c == ExitSuccess) + +processTranscript'' :: CreateProcess -> Maybe String -> IO (String, ExitCode) +processTranscript'' cp input = do +#ifndef mingw32_HOST_OS +{- This implementation interleves stdout and stderr in exactly the order + - the process writes them. -} + let setup = do + (readf, writef) <- System.Posix.IO.createPipe + System.Posix.IO.setFdOption readf System.Posix.IO.CloseOnExec True + System.Posix.IO.setFdOption writef System.Posix.IO.CloseOnExec True + readh <- System.Posix.IO.fdToHandle readf + writeh <- System.Posix.IO.fdToHandle writef + return (readh, writeh) + let cleanup (readh, writeh) = do + hClose readh + hClose writeh + bracket setup cleanup $ \(readh, writeh) -> do + let cp' = cp + { std_in = if isJust input then CreatePipe else Inherit + , std_out = UseHandle writeh + , std_err = UseHandle writeh + } + withCreateProcess cp' $ \hin hout herr pid -> do + get <- asyncreader pid readh + writeinput input (hin, hout, herr, pid) + code <- waitForProcess pid + transcript <- wait get + return (transcript, code) +#else +{- This implementation for Windows puts stderr after stdout. -} + let cp' = cp + { std_in = if isJust input then CreatePipe else Inherit + , std_out = CreatePipe + , std_err = CreatePipe + } + withCreateProcess cp' $ \hin hout herr pid -> do + let p = (hin, hout, herr, pid) + getout <- asyncreader pid (stdoutHandle p) + geterr <- asyncreader pid (stderrHandle p) + writeinput input p + code <- waitForProcess pid + transcript <- (++) <$> wait getout <*> wait geterr + return (transcript, code) +#endif + where + asyncreader pid h = async $ reader pid h [] + reader pid h c = hGetLineUntilExitOrEOF pid h >>= \case + Nothing -> return (unlines (reverse c)) + Just l -> reader pid h (l:c) + writeinput (Just s) p = do + let inh = stdinHandle p + unless (null s) $ do + hPutStr inh s + hFlush inh + hClose inh + writeinput Nothing _ = return () diff --git a/Utility/QuickCheck.hs b/Utility/QuickCheck.hs index 650f559..96e31d5 100644 --- a/Utility/QuickCheck.hs +++ b/Utility/QuickCheck.hs @@ -6,6 +6,7 @@ -} {-# OPTIONS_GHC -fno-warn-orphans #-} +{-# OPTIONS_GHC -fno-warn-tabs #-} {-# LANGUAGE TypeSynonymInstances #-} module Utility.QuickCheck diff --git a/Utility/RawFilePath.hs b/Utility/RawFilePath.hs index f32b226..b39423d 100644 --- a/Utility/RawFilePath.hs +++ b/Utility/RawFilePath.hs @@ -5,9 +5,11 @@ - - On Windows, filenames are in unicode, so RawFilePaths have to be - decoded. So this library will work, but less efficiently than using - - FilePath would. + - FilePath would. However, this library also takes care to support long + - filenames on Windows, by either using other libraries that do, or by + - doing UNC-style conversion itself. - - - Copyright 2019-2020 Joey Hess + - Copyright 2019-2023 Joey Hess - - License: BSD-2-clause -} @@ -27,6 +29,10 @@ module Utility.RawFilePath ( getCurrentDirectory, createDirectory, setFileMode, + setOwnerAndGroup, + rename, + createNamedPipe, + fileAccess, ) where #ifndef mingw32_HOST_OS @@ -47,23 +53,28 @@ createDirectory p = D.createDirectory p 0o777 #else import System.PosixCompat (FileStatus, FileMode) +-- System.PosixCompat does not handle UNC-style conversion itself, +-- so all uses of it library have to be pre-converted below. See +-- https://github.com/jacobstanley/unix-compat/issues/56 import qualified System.PosixCompat as P -import qualified System.PosixCompat.Files as F import qualified System.Directory as D import Utility.FileSystemEncoding +import Utility.Path.Windows readSymbolicLink :: RawFilePath -> IO RawFilePath readSymbolicLink f = toRawFilePath <$> P.readSymbolicLink (fromRawFilePath f) createSymbolicLink :: RawFilePath -> RawFilePath -> IO () -createSymbolicLink a b = P.createSymbolicLink - (fromRawFilePath a) - (fromRawFilePath b) +createSymbolicLink a b = do + a' <- fromRawFilePath <$> convertToWindowsNativeNamespace a + b' <- fromRawFilePath <$> convertToWindowsNativeNamespace b + P.createSymbolicLink a' b' createLink :: RawFilePath -> RawFilePath -> IO () -createLink a b = P.createLink - (fromRawFilePath a) - (fromRawFilePath b) +createLink a b = do + a' <- fromRawFilePath <$> convertToWindowsNativeNamespace a + b' <- fromRawFilePath <$> convertToWindowsNativeNamespace b + P.createLink a' b' {- On windows, removeLink is not available, so only remove files, - not symbolic links. -} @@ -71,10 +82,12 @@ removeLink :: RawFilePath -> IO () removeLink = D.removeFile . fromRawFilePath getFileStatus :: RawFilePath -> IO FileStatus -getFileStatus = P.getFileStatus . fromRawFilePath +getFileStatus p = P.getFileStatus . fromRawFilePath + =<< convertToWindowsNativeNamespace p getSymbolicLinkStatus :: RawFilePath -> IO FileStatus -getSymbolicLinkStatus = P.getSymbolicLinkStatus . fromRawFilePath +getSymbolicLinkStatus p = P.getSymbolicLinkStatus . fromRawFilePath + =<< convertToWindowsNativeNamespace p doesPathExist :: RawFilePath -> IO Bool doesPathExist = D.doesPathExist . fromRawFilePath @@ -86,5 +99,27 @@ createDirectory :: RawFilePath -> IO () createDirectory = D.createDirectory . fromRawFilePath setFileMode :: RawFilePath -> FileMode -> IO () -setFileMode = F.setFileMode . fromRawFilePath +setFileMode p m = do + p' <- fromRawFilePath <$> convertToWindowsNativeNamespace p + P.setFileMode p' m + +{- Using renamePath rather than the rename provided in unix-compat + - because of this bug https://github.com/jacobstanley/unix-compat/issues/56-} +rename :: RawFilePath -> RawFilePath -> IO () +rename a b = D.renamePath (fromRawFilePath a) (fromRawFilePath b) + +setOwnerAndGroup :: RawFilePath -> P.UserID -> P.GroupID -> IO () +setOwnerAndGroup p u g = do + p' <- fromRawFilePath <$> convertToWindowsNativeNamespace p + P.setOwnerAndGroup p' u g + +createNamedPipe :: RawFilePath -> FileMode -> IO () +createNamedPipe p m = do + p' <- fromRawFilePath <$> convertToWindowsNativeNamespace p + P.createNamedPipe p' m + +fileAccess :: RawFilePath -> Bool -> Bool -> Bool -> IO Bool +fileAccess p a b c = do + p' <- fromRawFilePath <$> convertToWindowsNativeNamespace p + P.fileAccess p' a b c #endif diff --git a/Utility/SafeOutput.hs b/Utility/SafeOutput.hs new file mode 100644 index 0000000..d781386 --- /dev/null +++ b/Utility/SafeOutput.hs @@ -0,0 +1,36 @@ +{- Safe output to the terminal of possibly attacker-controlled strings, + - avoiding displaying control characters. + - + - Copyright 2023 Joey Hess + - + - License: BSD-2-clause + -} + +{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, CPP #-} +{-# OPTIONS_GHC -fno-warn-tabs #-} + +module Utility.SafeOutput ( + safeOutput, + safeOutputChar, +) where + +import Data.Char +import qualified Data.ByteString as S + +class SafeOutputtable t where + safeOutput :: t -> t + +instance SafeOutputtable String where + safeOutput = filter safeOutputChar + +instance SafeOutputtable S.ByteString where + safeOutput = S.filter (safeOutputChar . chr . fromIntegral) + +safeOutputChar :: Char -> Bool +safeOutputChar c + | not (isControl c) = True + | c == '\n' = True + | c == '\t' = True + | c == '\DEL' = False + | ord c > 31 = True + | otherwise = False diff --git a/Utility/SystemDirectory.hs b/Utility/SystemDirectory.hs index b9040fe..a7d60f9 100644 --- a/Utility/SystemDirectory.hs +++ b/Utility/SystemDirectory.hs @@ -1,4 +1,4 @@ -{- System.Directory without its conflicting isSymbolicLink +{- System.Directory without its conflicting isSymbolicLink and getFileSize. - - Copyright 2016 Joey Hess - diff --git a/Utility/Tmp.hs b/Utility/Tmp.hs index 92bd921..efb15bd 100644 --- a/Utility/Tmp.hs +++ b/Utility/Tmp.hs @@ -21,12 +21,12 @@ import System.IO import System.FilePath import System.Directory import Control.Monad.IO.Class -import System.PosixCompat.Files hiding (removeLink) import System.IO.Error import Utility.Exception import Utility.FileSystemEncoding import Utility.FileMode +import qualified Utility.RawFilePath as R type Template = String @@ -62,14 +62,15 @@ viaTmp a file content = bracketIO setup cleanup use _ <- tryIO $ hClose h tryIO $ removeFile tmpfile use (tmpfile, h) = do + let tmpfile' = toRawFilePath tmpfile -- Make mode the same as if the file were created usually, -- not as a temp file. (This may fail on some filesystems -- that don't support file modes well, so ignore -- exceptions.) - _ <- liftIO $ tryIO $ setFileMode tmpfile =<< defaultFileMode + _ <- liftIO $ tryIO $ R.setFileMode tmpfile' =<< defaultFileMode liftIO $ hClose h a tmpfile content - liftIO $ rename tmpfile file + liftIO $ R.rename tmpfile' (toRawFilePath file) {- Runs an action with a tmp file located in the system's tmp directory - (or in "." if there is none) then removes the file. -} diff --git a/Utility/Url/Parse.hs b/Utility/Url/Parse.hs new file mode 100644 index 0000000..7fc952b --- /dev/null +++ b/Utility/Url/Parse.hs @@ -0,0 +1,63 @@ +{- Url parsing. + - + - Copyright 2011-2023 Joey Hess + - + - License: BSD-2-clause + -} + +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} + +module Utility.Url.Parse ( + parseURIPortable, + parseURIRelaxed, +) where + +import Network.URI +#ifdef mingw32_HOST_OS +import qualified System.FilePath.Windows as PW +#endif + +{- On unix this is the same as parseURI. But on Windows, + - it can parse urls such as file:///C:/path/to/file + - parseURI normally parses that as a path /C:/path/to/file + - and this simply removes the excess leading slash when there is a + - drive letter after it. -} +parseURIPortable :: String -> Maybe URI +#ifndef mingw32_HOST_OS +parseURIPortable = parseURI +#else +parseURIPortable s + | "file:" `isPrefixOf` s = do + u <- parseURI s + return $ case PW.splitDirectories (uriPath u) of + (p:d:_) | all PW.isPathSeparator p && PW.isDrive d -> + u { uriPath = dropWhile PW.isPathSeparator (uriPath u) } + _ -> u + | otherwise = parseURI s +#endif + +{- Allows for spaces and other stuff in urls, properly escaping them. -} +parseURIRelaxed :: String -> Maybe URI +parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $ + parseURIPortable $ escapeURIString isAllowedInURI s + +{- Some characters like '[' are allowed in eg, the address of + - an uri, but cannot appear unescaped further along in the uri. + - This handles that, expensively, by successively escaping each character + - from the back of the url until the url parses. + -} +parseURIRelaxed' :: String -> Maybe URI +parseURIRelaxed' s = go [] (reverse s) + where + go back [] = parseURI back + go back (c:cs) = case parseURI (escapeURIString isAllowedInURI (reverse (c:cs)) ++ back) of + Just u -> Just u + Nothing -> go (escapeURIChar escapemore c ++ back) cs + + escapemore '[' = False + escapemore ']' = False + escapemore c = isAllowedInURI c diff --git a/Utility/UserInfo.hs b/Utility/UserInfo.hs index 17ce8db..827229d 100644 --- a/Utility/UserInfo.hs +++ b/Utility/UserInfo.hs @@ -19,31 +19,32 @@ import Utility.Exception #ifndef mingw32_HOST_OS import Utility.Data import Control.Applicative +import System.Posix.User +#if MIN_VERSION_unix(2,8,0) +import System.Posix.User.ByteString (UserEntry) +#endif #endif -import System.PosixCompat import Prelude {- Current user's home directory. - - getpwent will fail on LDAP or NIS, so use HOME if set. -} myHomeDir :: IO FilePath -myHomeDir = either giveup return =<< myVal env homeDirectory - where +myHomeDir = either giveup return =<< #ifndef mingw32_HOST_OS - env = ["HOME"] + myVal ["HOME"] homeDirectory #else - env = ["USERPROFILE", "HOME"] -- HOME is used in Cygwin + myVal ["USERPROFILE", "HOME"] -- HOME is used in Cygwin #endif {- Current user's user name. -} myUserName :: IO (Either String String) -myUserName = myVal env userName - where +myUserName = #ifndef mingw32_HOST_OS - env = ["USER", "LOGNAME"] + myVal ["USER", "LOGNAME"] userName #else - env = ["USERNAME", "USER", "LOGNAME"] + myVal ["USERNAME", "USER", "LOGNAME"] #endif myUserGecos :: IO (Maybe String) @@ -54,16 +55,20 @@ myUserGecos = return Nothing myUserGecos = eitherToMaybe <$> myVal [] userGecos #endif +#ifndef mingw32_HOST_OS myVal :: [String] -> (UserEntry -> String) -> IO (Either String String) myVal envvars extract = go envvars where go [] = either (const $ envnotset) (Right . extract) <$> get go (v:vs) = maybe (go vs) (return . Right) =<< getEnv v -#ifndef mingw32_HOST_OS -- This may throw an exception if the system doesn't have a -- passwd file etc; don't let it crash. get = tryNonAsync $ getUserEntryForID =<< getEffectiveUserID #else - get = return envnotset +myVal :: [String] -> IO (Either String String) +myVal envvars = go envvars + where + go [] = return envnotset + go (v:vs) = maybe (go vs) (return . Right) =<< getEnv v #endif envnotset = Left ("environment not set: " ++ show envvars) diff --git a/git-repair.cabal b/git-repair.cabal index c269fe7..371072e 100644 --- a/git-repair.cabal +++ b/git-repair.cabal @@ -1,5 +1,5 @@ Name: git-repair -Version: 1.20220404 +Version: 1.20230814 Cabal-Version: >= 1.10 License: AGPL-3 Maintainer: Joey Hess @@ -73,13 +73,13 @@ Executable git-repair Git.DiffTreeItem Git.Env Git.FilePath - Git.Filename Git.Fsck Git.HashObject Git.Index Git.LsFiles Git.LsTree Git.Objects + Git.Quote Git.Ref Git.RefLog Git.Remote @@ -120,10 +120,12 @@ Executable git-repair Utility.Percentage Utility.Process Utility.Process.Shim + Utility.Process.Transcript Utility.QuickCheck Utility.RawFilePath Utility.Rsync Utility.SafeCommand + Utility.SafeOutput Utility.SimpleProtocol Utility.Split Utility.SystemDirectory @@ -133,3 +135,4 @@ Executable git-repair Utility.Tmp.Dir Utility.Tuple Utility.UserInfo + Utility.Url.Parse -- cgit v1.2.3