From 0be4febab2f4fe102d2bd1bb9bf564ea0b5e1ee6 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 21 Nov 2013 13:44:56 -0400 Subject: improve findMissing git cat-file -p cannot be relied on to tell when an object is corrupt. If it fails, the fsck may not find all bad objects -- but as long as fsck exits nonzero, it will return a failing fsckresult, and so recovery will run. In recovery, the objects get unpacked. This allows the improved findMissing to find all corrupt loose objects when fsck is run again as part of the recovery. Removed the repack / prune-packed workaround that I added earlier to find corrupt loose objects that fsck wasn't finding. That was slow, and we want to keep all loose objects, so that findMissing will work. And, it's unncessary, now that findMissing is fixed. Also, fixed some places where unreadable files would crash recovery. --- Git/Fsck.hs | 24 +++++++++++++++++++++--- Git/Repair.hs | 43 +++++++++++++++++-------------------------- debian/control | 3 ++- git-repair.cabal | 9 ++++++++- 4 files changed, 48 insertions(+), 31 deletions(-) diff --git a/Git/Fsck.hs b/Git/Fsck.hs index 309f4bb..a3a6e77 100644 --- a/Git/Fsck.hs +++ b/Git/Fsck.hs @@ -17,9 +17,12 @@ import Common import Git import Git.Command import Git.Sha +import Git.Objects import Utility.Batch +import Utility.Hash import qualified Data.Set as S +import qualified Data.ByteString.Lazy as L type MissingObjects = S.Set Sha @@ -57,18 +60,33 @@ foundBroken (Just s) = not (S.null s) {- Finds objects that are missing from the git repsitory, or are corrupt. - - This does not use git cat-file --batch, because catting a corrupt - - object can cause it to crash, or to report incorrect size information. + - object can cause it to crash, or to report incorrect size information.a + - + - Note that git cat-file -p can succeed in printing out objects that + - are corrupt. Since its output may be a pretty-printed object, or may be + - a blob, it cannot be verified. So, this has false negatives. + - + - As a secondary check, if cat-file says the object is there, check if + - the loose object file is available, and if so, try taking its sha1 + - ourselves. This will always work once all packs have been unpacked. -} findMissing :: [Sha] -> Repo -> IO MissingObjects -findMissing objs r = S.fromList <$> filterM (not <$$> cancat) objs +findMissing objs r = S.fromList <$> filterM (not <$$> present) objs where - cancat o = either (const False) (const True) <$> tryIO (cat o) + present o = + either (const $ return False) (const $ verifyLooseObject o r) + =<< tryIO (cat o) cat o = runQuiet [ Param "cat-file" , Param "-p" , Param (show o) ] r +verifyLooseObject :: Sha -> Repo -> IO Bool +verifyLooseObject s r = do + sha <- sha1 <$> L.readFile (looseObjectFile r s) + return $ show sha == show s + findShas :: String -> [Sha] findShas = catMaybes . map extractSha . concat . map words . lines diff --git a/Git/Repair.hs b/Git/Repair.hs index 88150e4..3fb79df 100644 --- a/Git/Repair.hs +++ b/Git/Repair.hs @@ -78,26 +78,13 @@ cleanCorruptObjects mmissing r = check mmissing putStrLn "Re-running git fsck to see if it finds more problems." v <- findBroken False r case v of - Nothing - | numremoved > 0 -> do - hPutStrLn stderr $ unwords - [ "git fsck found a problem, which was not corrected after removing" - , show numremoved - , "corrupt objects." - ] - return Nothing - | otherwise -> do - hPutStrLn stderr "Repacking all objects, to try to flush out unknown corrupt ones." - void $ runBool - [ Param "repack" - , Param "-a" - ] r - void $ runBool - [ Param "prune-packed" - ] r - s <- S.fromList <$> listLooseObjectShas r - void $ removeLoose r s - retry (S.size s) S.empty + Nothing -> do + hPutStrLn stderr $ unwords + [ "git fsck found a problem, which was not corrected after removing" + , show numremoved + , "corrupt objects." + ] + return Nothing Just newbad -> do removed <- removeLoose r newbad let s = S.union oldbad newbad @@ -133,6 +120,7 @@ explodePacks r = do go packfile = withTmpFileIn (localGitDir r) "pack" $ \tmp _ -> do moveFile packfile tmp nukeFile $ packIdxFile packfile + allowRead tmp -- May fail, if pack file is corrupt. void $ tryIO $ pipeWrite [Param "unpack-objects", Param "-r"] r $ \h -> @@ -267,8 +255,7 @@ removeTrackingBranches missing goodcommits r = getAllRefs :: Repo -> IO [Ref] getAllRefs r = do packedrs <- mapMaybe parsePacked . lines - <$> catchDefaultIO "" - (readFileStrictAnyEncoding $ packedRefsFile r) + <$> catchDefaultIO "" (safeReadFile $ packedRefsFile r) loosers <- map toref <$> dirContentsRecursive refdir return $ packedrs ++ loosers where @@ -296,9 +283,9 @@ nukeBranchRef b r = void $ usegit <||> byhand ] r byhand = do nukeFile $ localGitDir r show b - whenM (doesFileExist packedrefs) $ + whenM (doesFileExist packedrefs) $ do withTmpFile "packed-refs" $ \tmp h -> do - ls <- lines <$> readFileStrictAnyEncoding packedrefs + ls <- lines <$> safeReadFile packedrefs hPutStr h $ unlines $ filter (not . skiprefline) ls hClose h @@ -476,8 +463,7 @@ displayList items header -} preRepair :: Repo -> IO () preRepair g = do - void $ tryIO $ allowRead headfile - unlessM (validhead <$> catchDefaultIO "" (readFileStrictAnyEncoding headfile)) $ do + unlessM (validhead <$> catchDefaultIO "" (safeReadFile headfile)) $ do nukeFile headfile writeFile headfile "ref: refs/heads/master" where @@ -596,3 +582,8 @@ runRepairOf fsckresult forced referencerepo g = do successfulRepair :: (Bool, MissingObjects, [Branch]) -> Bool successfulRepair = fst3 + +safeReadFile :: FilePath -> IO String +safeReadFile f = do + allowRead f + readFileStrictAnyEncoding f diff --git a/debian/control b/debian/control index bf14911..eb91d47 100644 --- a/debian/control +++ b/debian/control @@ -13,7 +13,8 @@ Build-Depends: libghc-unix-compat-dev, libghc-utf8-string-dev, libghc-async-dev, - libghc-optparse-applicative-dev + libghc-optparse-applicative-dev, + libghc-cryptohash-dev Maintainer: Joey Hess Standards-Version: 3.9.4 Vcs-Git: git://git-repair.branchable.com/ diff --git a/git-repair.cabal b/git-repair.cabal index 22edee1..2cdce40 100644 --- a/git-repair.cabal +++ b/git-repair.cabal @@ -17,17 +17,24 @@ Description: It is a complement to git fsck, which finds problems, but does not fix them. +Flag CryptoHash + Description: Enable use of cryptohash for checksumming + Executable git-repair Main-Is: git-repair.hs GHC-Options: -Wall -threaded Build-Depends: MissingH, hslogger, directory, filepath, containers, mtl, network, extensible-exceptions, unix-compat, bytestring, base >= 4.5, base < 5, IfElse, text, process, time, QuickCheck, - utf8-string, async, optparse-applicative + utf8-string, async, optparse-applicative, SHA if (! os(windows)) Build-Depends: unix + if flag(CryptoHash) + Build-Depends: cryptohash (>= 0.10.0) + CPP-Options: -DWITH_CRYPTOHASH + source-repository head type: git location: git://git-repair.branchable.com/ -- cgit v1.2.3