summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoey Hess <joey@kitenet.net>2013-11-21 13:44:56 -0400
committerJoey Hess <joey@kitenet.net>2013-11-21 13:48:34 -0400
commit0be4febab2f4fe102d2bd1bb9bf564ea0b5e1ee6 (patch)
tree9a754244f9514923bfc5576b0fe031de06074023
parent8e67a1e418e51c3ddf4d4b100d6bd61211a1a60b (diff)
downloadgit-repair-0be4febab2f4fe102d2bd1bb9bf564ea0b5e1ee6.tar.gz
improve findMissing
git cat-file -p cannot be relied on to tell when an object is corrupt. If it fails, the fsck may not find all bad objects -- but as long as fsck exits nonzero, it will return a failing fsckresult, and so recovery will run. In recovery, the objects get unpacked. This allows the improved findMissing to find all corrupt loose objects when fsck is run again as part of the recovery. Removed the repack / prune-packed workaround that I added earlier to find corrupt loose objects that fsck wasn't finding. That was slow, and we want to keep all loose objects, so that findMissing will work. And, it's unncessary, now that findMissing is fixed. Also, fixed some places where unreadable files would crash recovery.
-rw-r--r--Git/Fsck.hs24
-rw-r--r--Git/Repair.hs43
-rw-r--r--debian/control3
-rw-r--r--git-repair.cabal9
4 files changed, 48 insertions, 31 deletions
diff --git a/Git/Fsck.hs b/Git/Fsck.hs
index 309f4bb..a3a6e77 100644
--- a/Git/Fsck.hs
+++ b/Git/Fsck.hs
@@ -17,9 +17,12 @@ import Common
import Git
import Git.Command
import Git.Sha
+import Git.Objects
import Utility.Batch
+import Utility.Hash
import qualified Data.Set as S
+import qualified Data.ByteString.Lazy as L
type MissingObjects = S.Set Sha
@@ -57,18 +60,33 @@ foundBroken (Just s) = not (S.null s)
{- Finds objects that are missing from the git repsitory, or are corrupt.
-
- This does not use git cat-file --batch, because catting a corrupt
- - object can cause it to crash, or to report incorrect size information.
+ - object can cause it to crash, or to report incorrect size information.a
+ -
+ - Note that git cat-file -p can succeed in printing out objects that
+ - are corrupt. Since its output may be a pretty-printed object, or may be
+ - a blob, it cannot be verified. So, this has false negatives.
+ -
+ - As a secondary check, if cat-file says the object is there, check if
+ - the loose object file is available, and if so, try taking its sha1
+ - ourselves. This will always work once all packs have been unpacked.
-}
findMissing :: [Sha] -> Repo -> IO MissingObjects
-findMissing objs r = S.fromList <$> filterM (not <$$> cancat) objs
+findMissing objs r = S.fromList <$> filterM (not <$$> present) objs
where
- cancat o = either (const False) (const True) <$> tryIO (cat o)
+ present o =
+ either (const $ return False) (const $ verifyLooseObject o r)
+ =<< tryIO (cat o)
cat o = runQuiet
[ Param "cat-file"
, Param "-p"
, Param (show o)
] r
+verifyLooseObject :: Sha -> Repo -> IO Bool
+verifyLooseObject s r = do
+ sha <- sha1 <$> L.readFile (looseObjectFile r s)
+ return $ show sha == show s
+
findShas :: String -> [Sha]
findShas = catMaybes . map extractSha . concat . map words . lines
diff --git a/Git/Repair.hs b/Git/Repair.hs
index 88150e4..3fb79df 100644
--- a/Git/Repair.hs
+++ b/Git/Repair.hs
@@ -78,26 +78,13 @@ cleanCorruptObjects mmissing r = check mmissing
putStrLn "Re-running git fsck to see if it finds more problems."
v <- findBroken False r
case v of
- Nothing
- | numremoved > 0 -> do
- hPutStrLn stderr $ unwords
- [ "git fsck found a problem, which was not corrected after removing"
- , show numremoved
- , "corrupt objects."
- ]
- return Nothing
- | otherwise -> do
- hPutStrLn stderr "Repacking all objects, to try to flush out unknown corrupt ones."
- void $ runBool
- [ Param "repack"
- , Param "-a"
- ] r
- void $ runBool
- [ Param "prune-packed"
- ] r
- s <- S.fromList <$> listLooseObjectShas r
- void $ removeLoose r s
- retry (S.size s) S.empty
+ Nothing -> do
+ hPutStrLn stderr $ unwords
+ [ "git fsck found a problem, which was not corrected after removing"
+ , show numremoved
+ , "corrupt objects."
+ ]
+ return Nothing
Just newbad -> do
removed <- removeLoose r newbad
let s = S.union oldbad newbad
@@ -133,6 +120,7 @@ explodePacks r = do
go packfile = withTmpFileIn (localGitDir r) "pack" $ \tmp _ -> do
moveFile packfile tmp
nukeFile $ packIdxFile packfile
+ allowRead tmp
-- May fail, if pack file is corrupt.
void $ tryIO $
pipeWrite [Param "unpack-objects", Param "-r"] r $ \h ->
@@ -267,8 +255,7 @@ removeTrackingBranches missing goodcommits r =
getAllRefs :: Repo -> IO [Ref]
getAllRefs r = do
packedrs <- mapMaybe parsePacked . lines
- <$> catchDefaultIO ""
- (readFileStrictAnyEncoding $ packedRefsFile r)
+ <$> catchDefaultIO "" (safeReadFile $ packedRefsFile r)
loosers <- map toref <$> dirContentsRecursive refdir
return $ packedrs ++ loosers
where
@@ -296,9 +283,9 @@ nukeBranchRef b r = void $ usegit <||> byhand
] r
byhand = do
nukeFile $ localGitDir r </> show b
- whenM (doesFileExist packedrefs) $
+ whenM (doesFileExist packedrefs) $ do
withTmpFile "packed-refs" $ \tmp h -> do
- ls <- lines <$> readFileStrictAnyEncoding packedrefs
+ ls <- lines <$> safeReadFile packedrefs
hPutStr h $ unlines $
filter (not . skiprefline) ls
hClose h
@@ -476,8 +463,7 @@ displayList items header
-}
preRepair :: Repo -> IO ()
preRepair g = do
- void $ tryIO $ allowRead headfile
- unlessM (validhead <$> catchDefaultIO "" (readFileStrictAnyEncoding headfile)) $ do
+ unlessM (validhead <$> catchDefaultIO "" (safeReadFile headfile)) $ do
nukeFile headfile
writeFile headfile "ref: refs/heads/master"
where
@@ -596,3 +582,8 @@ runRepairOf fsckresult forced referencerepo g = do
successfulRepair :: (Bool, MissingObjects, [Branch]) -> Bool
successfulRepair = fst3
+
+safeReadFile :: FilePath -> IO String
+safeReadFile f = do
+ allowRead f
+ readFileStrictAnyEncoding f
diff --git a/debian/control b/debian/control
index bf14911..eb91d47 100644
--- a/debian/control
+++ b/debian/control
@@ -13,7 +13,8 @@ Build-Depends:
libghc-unix-compat-dev,
libghc-utf8-string-dev,
libghc-async-dev,
- libghc-optparse-applicative-dev
+ libghc-optparse-applicative-dev,
+ libghc-cryptohash-dev
Maintainer: Joey Hess <joeyh@debian.org>
Standards-Version: 3.9.4
Vcs-Git: git://git-repair.branchable.com/
diff --git a/git-repair.cabal b/git-repair.cabal
index 22edee1..2cdce40 100644
--- a/git-repair.cabal
+++ b/git-repair.cabal
@@ -17,17 +17,24 @@ Description:
It is a complement to git fsck, which finds problems, but does not fix
them.
+Flag CryptoHash
+ Description: Enable use of cryptohash for checksumming
+
Executable git-repair
Main-Is: git-repair.hs
GHC-Options: -Wall -threaded
Build-Depends: MissingH, hslogger, directory, filepath, containers, mtl,
network, extensible-exceptions, unix-compat, bytestring,
base >= 4.5, base < 5, IfElse, text, process, time, QuickCheck,
- utf8-string, async, optparse-applicative
+ utf8-string, async, optparse-applicative, SHA
if (! os(windows))
Build-Depends: unix
+ if flag(CryptoHash)
+ Build-Depends: cryptohash (>= 0.10.0)
+ CPP-Options: -DWITH_CRYPTOHASH
+
source-repository head
type: git
location: git://git-repair.branchable.com/