From 3c9630388ab0234df9e13473ac20c147e77074c5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 4 May 2022 11:36:52 -0400 Subject: Avoid treating refs that are not commit objects as evidence of repository corruption merged fix from git-annex, which actually has such a ref, refs/annex/last-index --- CHANGELOG | 7 +++++++ Git/Fsck.hs | 22 +++++++++++++++++----- Git/Repair.hs | 57 ++++++++++++++++++++++++++++++++++----------------------- git-repair.hs | 2 +- 4 files changed, 59 insertions(+), 29 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 6aa6e05..3d0ca96 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,10 @@ +git-repair (1.20210630) UNRELEASED; urgency=medium + + * Avoid treating refs that are not commit objects as evidence of + repository corruption. + + -- Joey Hess Wed, 04 May 2022 11:33:48 -0400 + git-repair (1.20210629) unstable; urgency=medium * Fixed bug that interrupting the program while it was fixing repository diff --git a/Git/Fsck.hs b/Git/Fsck.hs index 7440b92..4544c13 100644 --- a/Git/Fsck.hs +++ b/Git/Fsck.hs @@ -1,4 +1,5 @@ {- git fsck interface +i it is not fully repoducibleI repeated the same steps - - Copyright 2013 Joey Hess - @@ -69,9 +70,17 @@ instance Monoid FsckOutput where - look for anything in its output (both stdout and stderr) that appears - to be a git sha. Not all such shas are of broken objects, so ask git - to try to cat the object, and see if it fails. + - + - Note that there is a possible false positive: When changes are being + - made to the repo while this is running, fsck might complain about a + - missing object that has not made it to disk yet. Catting the object + - then succeeds, so it's not included in the FsckResults. But, fsck then + - exits nonzero, and so FsckFailed is returned. Set ignorenonzeroexit + - to avoid this false positive, at the risk of perhaps missing a problem + - so bad that fsck crashes without outputting any missing shas. -} -findBroken :: Bool -> Repo -> IO FsckResults -findBroken batchmode r = do +findBroken :: Bool -> Bool -> Repo -> IO FsckResults +findBroken batchmode ignorenonzeroexit r = do let (command, params) = ("git", fsckParams r) (command', params') <- if batchmode then toBatchCommand (command, params) @@ -90,10 +99,10 @@ findBroken batchmode r = do fsckok <- checkSuccessProcess pid case mappend o1 o2 of FsckOutput badobjs truncated - | S.null badobjs && not fsckok -> return FsckFailed + | S.null badobjs && not fsckok -> return fsckfailed | otherwise -> return $ FsckFoundMissing badobjs truncated NoFsckOutput - | not fsckok -> return FsckFailed + | not fsckok -> return fsckfailed | otherwise -> return noproblem -- If all fsck output was duplicateEntries warnings, -- the repository is not broken, it just has some @@ -104,6 +113,9 @@ findBroken batchmode r = do maxobjs = 10000 noproblem = FsckFoundMissing S.empty False + fsckfailed + | ignorenonzeroexit = noproblem + | otherwise = FsckFailed foundBroken :: FsckResults -> Bool foundBroken FsckFailed = True @@ -147,7 +159,7 @@ isMissing s r = either (const True) (const False) <$> tryIO dump ] r findShas :: [String] -> [Sha] -findShas = catMaybes . map (extractSha . encodeBS') +findShas = catMaybes . map (extractSha . encodeBS) . concat . map words . filter wanted where wanted l = not ("dangling " `isPrefixOf` l) diff --git a/Git/Repair.hs b/Git/Repair.hs index 144c96f..7d47f84 100644 --- a/Git/Repair.hs +++ b/Git/Repair.hs @@ -114,7 +114,7 @@ retrieveMissingObjects missing referencerepo r | not (foundBroken missing) = return missing | otherwise = withTmpDir "tmprepo" $ \tmpdir -> do unlessM (boolSystem "git" [Param "init", File tmpdir]) $ - error $ "failed to create temp repository in " ++ tmpdir + giveup $ "failed to create temp repository in " ++ tmpdir tmpr <- Config.read =<< Construct.fromPath (toRawFilePath tmpdir) let repoconfig r' = fromRawFilePath (localGitDir r' P. "config") whenM (doesFileExist (repoconfig r)) $ @@ -252,7 +252,7 @@ getAllRefs r = getAllRefs' (fromRawFilePath (localGitDir r) "refs") getAllRefs' :: FilePath -> IO [Ref] getAllRefs' refdir = do let topsegs = length (splitPath refdir) - 1 - let toref = Ref . encodeBS' . joinPath . drop topsegs . splitPath + let toref = Ref . encodeBS . joinPath . drop topsegs . splitPath map toref <$> dirContentsRecursive refdir explodePackedRefsFile :: Repo -> IO () @@ -279,8 +279,8 @@ packedRefsFile r = fromRawFilePath (localGitDir r) "packed-refs" parsePacked :: String -> Maybe (Sha, Ref) parsePacked l = case words l of (sha:ref:[]) - | isJust (extractSha (encodeBS' sha)) && Ref.legal True ref -> - Just (Ref (encodeBS' sha), Ref (encodeBS' ref)) + | isJust (extractSha (encodeBS sha)) && Ref.legal True ref -> + Just (Ref (encodeBS sha), Ref (encodeBS ref)) _ -> Nothing {- git-branch -d cannot be used to remove a branch that is directly @@ -325,7 +325,11 @@ findUncorruptedCommit missing goodcommits branch r = do - the commit. Also adds to a set of commit shas that have been verified to - be good, which can be passed into subsequent calls to avoid - redundant work when eg, chasing down branches to find the first - - uncorrupted commit. -} + - uncorrupted commit. + - + - When the sha is not a commit but some other git object, returns + - true, but does not add it to the set. + -} verifyCommit :: MissingObjects -> GoodCommits -> Sha -> Repo -> IO (Bool, GoodCommits) verifyCommit missing goodcommits commit r | checkGoodCommit commit goodcommits = return (True, goodcommits) @@ -337,21 +341,28 @@ verifyCommit missing goodcommits commit r , Param (fromRef commit) ] r let committrees = map (parse . decodeBL) ls - if any isNothing committrees || null committrees - then do - void cleanup - return (False, goodcommits) - else do - let cts = catMaybes committrees - ifM (cleanup <&&> check cts) - ( return (True, addGoodCommits (map fst cts) goodcommits) - , return (False, goodcommits) - ) + -- git log on an object that is not a commit will + -- succeed without any output + if null committrees + then ifM cleanup + ( return (True, goodcommits) + , return (False, goodcommits) + ) + else if any isNothing committrees + then do + void cleanup + return (False, goodcommits) + else do + let cts = catMaybes committrees + ifM (cleanup <&&> check cts) + ( return (True, addGoodCommits (map fst cts) goodcommits) + , return (False, goodcommits) + ) where parse l = case words l of (commitsha:treesha:[]) -> (,) - <$> extractSha (encodeBS' commitsha) - <*> extractSha (encodeBS' treesha) + <$> extractSha (encodeBS commitsha) + <*> extractSha (encodeBS treesha) _ -> Nothing check [] = return True check ((c, t):rest) @@ -469,14 +480,14 @@ preRepair g = do where headfile = localGitDir g P. "HEAD" validhead s = "ref: refs/" `isPrefixOf` s - || isJust (extractSha (encodeBS' s)) + || isJust (extractSha (encodeBS s)) {- Put it all together. -} runRepair :: (Ref -> Bool) -> Bool -> Repo -> IO (Bool, [Branch]) runRepair removablebranch forced g = do preRepair g putStrLn "Running git fsck ..." - fsckresult <- findBroken False g + fsckresult <- findBroken False False g if foundBroken fsckresult then do putStrLn "Fsck found problems, attempting repair." @@ -500,7 +511,7 @@ runRepairOf fsckresult removablebranch forced referencerepo g = do runRepair' :: (Ref -> Bool) -> FsckResults -> Bool -> Maybe FilePath -> Repo -> IO (Bool, [Branch]) runRepair' removablebranch fsckresult forced referencerepo g = do cleanCorruptObjects fsckresult g - missing <- findBroken False g + missing <- findBroken False False g stillmissing <- retrieveMissingObjects missing referencerepo g case stillmissing of FsckFoundMissing s t @@ -529,7 +540,7 @@ runRepair' removablebranch fsckresult forced referencerepo g = do | forced -> ifM (pure (repoIsLocalBare g) <||> checkIndex g) ( do cleanCorruptObjects FsckFailed g - stillmissing' <- findBroken False g + stillmissing' <- findBroken False False g case stillmissing' of FsckFailed -> return (False, []) FsckFoundMissing s t -> forcerepair s t @@ -575,7 +586,7 @@ runRepair' removablebranch fsckresult forced referencerepo g = do -- the repair process. if fscktruncated then do - fsckresult' <- findBroken False g + fsckresult' <- findBroken False False g case fsckresult' of FsckFailed -> do putStrLn "git fsck is failing" @@ -597,7 +608,7 @@ runRepair' removablebranch fsckresult forced referencerepo g = do removeWhenExistsWith R.removeLink (indexFile g) -- The corrupted index can prevent fsck from finding other -- problems, so re-run repair. - fsckresult' <- findBroken False g + fsckresult' <- findBroken False False g result <- runRepairOf fsckresult' removablebranch forced referencerepo g putStrLn "Removed the corrupted index file. You should look at what files are present in your working tree and git add them back to the index when appropriate." return result diff --git a/git-repair.hs b/git-repair.hs index 7ca1854..18721a9 100644 --- a/git-repair.hs +++ b/git-repair.hs @@ -100,7 +100,7 @@ runTest settings damage = withTmpDir "tmprepo" $ \tmpdir -> do case repairstatus of Just True -> testResult repairstatus . Just . not . Git.Fsck.foundBroken - =<< Git.Fsck.findBroken False g + =<< Git.Fsck.findBroken False False g _ -> testResult repairstatus Nothing -- Pass test result and fsck result -- cgit v1.2.3