summaryrefslogtreecommitdiff
path: root/Git/Fsck.hs
diff options
context:
space:
mode:
authorJoey Hess <joey@kitenet.net>2013-11-21 13:44:56 -0400
committerJoey Hess <joey@kitenet.net>2013-11-21 13:48:34 -0400
commit0be4febab2f4fe102d2bd1bb9bf564ea0b5e1ee6 (patch)
tree9a754244f9514923bfc5576b0fe031de06074023 /Git/Fsck.hs
parent8e67a1e418e51c3ddf4d4b100d6bd61211a1a60b (diff)
downloadgit-repair-0be4febab2f4fe102d2bd1bb9bf564ea0b5e1ee6.tar.gz
improve findMissing
git cat-file -p cannot be relied on to tell when an object is corrupt. If it fails, the fsck may not find all bad objects -- but as long as fsck exits nonzero, it will return a failing fsckresult, and so recovery will run. In recovery, the objects get unpacked. This allows the improved findMissing to find all corrupt loose objects when fsck is run again as part of the recovery. Removed the repack / prune-packed workaround that I added earlier to find corrupt loose objects that fsck wasn't finding. That was slow, and we want to keep all loose objects, so that findMissing will work. And, it's unncessary, now that findMissing is fixed. Also, fixed some places where unreadable files would crash recovery.
Diffstat (limited to 'Git/Fsck.hs')
-rw-r--r--Git/Fsck.hs24
1 files changed, 21 insertions, 3 deletions
diff --git a/Git/Fsck.hs b/Git/Fsck.hs
index 309f4bb..a3a6e77 100644
--- a/Git/Fsck.hs
+++ b/Git/Fsck.hs
@@ -17,9 +17,12 @@ import Common
import Git
import Git.Command
import Git.Sha
+import Git.Objects
import Utility.Batch
+import Utility.Hash
import qualified Data.Set as S
+import qualified Data.ByteString.Lazy as L
type MissingObjects = S.Set Sha
@@ -57,18 +60,33 @@ foundBroken (Just s) = not (S.null s)
{- Finds objects that are missing from the git repsitory, or are corrupt.
-
- This does not use git cat-file --batch, because catting a corrupt
- - object can cause it to crash, or to report incorrect size information.
+ - object can cause it to crash, or to report incorrect size information.a
+ -
+ - Note that git cat-file -p can succeed in printing out objects that
+ - are corrupt. Since its output may be a pretty-printed object, or may be
+ - a blob, it cannot be verified. So, this has false negatives.
+ -
+ - As a secondary check, if cat-file says the object is there, check if
+ - the loose object file is available, and if so, try taking its sha1
+ - ourselves. This will always work once all packs have been unpacked.
-}
findMissing :: [Sha] -> Repo -> IO MissingObjects
-findMissing objs r = S.fromList <$> filterM (not <$$> cancat) objs
+findMissing objs r = S.fromList <$> filterM (not <$$> present) objs
where
- cancat o = either (const False) (const True) <$> tryIO (cat o)
+ present o =
+ either (const $ return False) (const $ verifyLooseObject o r)
+ =<< tryIO (cat o)
cat o = runQuiet
[ Param "cat-file"
, Param "-p"
, Param (show o)
] r
+verifyLooseObject :: Sha -> Repo -> IO Bool
+verifyLooseObject s r = do
+ sha <- sha1 <$> L.readFile (looseObjectFile r s)
+ return $ show sha == show s
+
findShas :: String -> [Sha]
findShas = catMaybes . map extractSha . concat . map words . lines