From 0be4febab2f4fe102d2bd1bb9bf564ea0b5e1ee6 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 21 Nov 2013 13:44:56 -0400 Subject: improve findMissing git cat-file -p cannot be relied on to tell when an object is corrupt. If it fails, the fsck may not find all bad objects -- but as long as fsck exits nonzero, it will return a failing fsckresult, and so recovery will run. In recovery, the objects get unpacked. This allows the improved findMissing to find all corrupt loose objects when fsck is run again as part of the recovery. Removed the repack / prune-packed workaround that I added earlier to find corrupt loose objects that fsck wasn't finding. That was slow, and we want to keep all loose objects, so that findMissing will work. And, it's unncessary, now that findMissing is fixed. Also, fixed some places where unreadable files would crash recovery. --- Git/Fsck.hs | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'Git/Fsck.hs') diff --git a/Git/Fsck.hs b/Git/Fsck.hs index 309f4bb..a3a6e77 100644 --- a/Git/Fsck.hs +++ b/Git/Fsck.hs @@ -17,9 +17,12 @@ import Common import Git import Git.Command import Git.Sha +import Git.Objects import Utility.Batch +import Utility.Hash import qualified Data.Set as S +import qualified Data.ByteString.Lazy as L type MissingObjects = S.Set Sha @@ -57,18 +60,33 @@ foundBroken (Just s) = not (S.null s) {- Finds objects that are missing from the git repsitory, or are corrupt. - - This does not use git cat-file --batch, because catting a corrupt - - object can cause it to crash, or to report incorrect size information. + - object can cause it to crash, or to report incorrect size information.a + - + - Note that git cat-file -p can succeed in printing out objects that + - are corrupt. Since its output may be a pretty-printed object, or may be + - a blob, it cannot be verified. So, this has false negatives. + - + - As a secondary check, if cat-file says the object is there, check if + - the loose object file is available, and if so, try taking its sha1 + - ourselves. This will always work once all packs have been unpacked. -} findMissing :: [Sha] -> Repo -> IO MissingObjects -findMissing objs r = S.fromList <$> filterM (not <$$> cancat) objs +findMissing objs r = S.fromList <$> filterM (not <$$> present) objs where - cancat o = either (const False) (const True) <$> tryIO (cat o) + present o = + either (const $ return False) (const $ verifyLooseObject o r) + =<< tryIO (cat o) cat o = runQuiet [ Param "cat-file" , Param "-p" , Param (show o) ] r +verifyLooseObject :: Sha -> Repo -> IO Bool +verifyLooseObject s r = do + sha <- sha1 <$> L.readFile (looseObjectFile r s) + return $ show sha == show s + findShas :: String -> [Sha] findShas = catMaybes . map extractSha . concat . map words . lines -- cgit v1.2.3