diff options
-rw-r--r-- | Git/Fsck.hs | 58 | ||||
-rw-r--r-- | Git/Repair.hs | 100 | ||||
-rw-r--r-- | Utility/Tmp.hs | 2 | ||||
-rw-r--r-- | debian/changelog | 2 | ||||
-rw-r--r-- | git-repair.cabal | 4 | ||||
-rw-r--r-- | test-runner.hs | 2 |
6 files changed, 109 insertions, 59 deletions
diff --git a/Git/Fsck.hs b/Git/Fsck.hs index 2c94230..16b0235 100644 --- a/Git/Fsck.hs +++ b/Git/Fsck.hs @@ -21,6 +21,8 @@ import Git.CatFile import Utility.Batch import qualified Data.Set as S +import System.Process (std_err, std_out) +import Control.Concurrent type MissingObjects = S.Set Sha @@ -36,17 +38,47 @@ type FsckResults = Maybe MissingObjects - look for anything in its output (both stdout and stderr) that appears - to be a git sha. Not all such shas are of broken objects, so ask git - to try to cat the object, and see if it fails. + - + - Some forms of corruption will crash fsck in ways that does not let it + - output the sha of the broken object. In such a case, Nothing will be + - returned. In this case, to find broken objects, re-run with tryharder + - set to True. This makes fsck run in verbose mode, so it prints out + - shas before checking them. We assume that the last sha is the one that + - it crashed on, and it may have crashed following from eg, a commit to a + - tree to a subtree. So, run git show on the sha, and examin the stderr + - to find an actual bad sha. -} -findBroken :: Bool -> Repo -> IO FsckResults -findBroken batchmode r = do +findBroken :: Bool -> Bool -> Repo -> IO FsckResults +findBroken batchmode tryharder r = do (output, fsckok) <- processTranscript command' (toCommand params') Nothing - let objs = parseFsckOutput output - badobjs <- findMissing objs r - if S.null badobjs && not fsckok - then return Nothing - else return $ Just badobjs + let objs = findShas output + if fsckok || not tryharder + then do + badobjs <- findMissing objs r + if S.null badobjs && not fsckok + then return Nothing + else return $ Just badobjs + else case lastMaybe objs of + Nothing -> return Nothing + Just o -> do + p@(_, _, _, pid) <- createProcess $ + ( proc "git" $ toCommand $ gitCommandLine [ Param "show", Param $ show o ] r ) + { std_err = CreatePipe + , std_out = CreatePipe + } + void $ forkIO $ void $ hGetContents (stdoutHandle p) + objs' <- findShas <$> + hGetContentsStrict (stderrHandle p) + badobjs <- findMissing objs' r + ifM (checkSuccessProcess pid) + ( if S.null badobjs + then return Nothing + else return $ Just badobjs + , return $ Just $ S.singleton o + ) + where - (command, params) = ("git", fsckParams r) + (command, params) = ("git", fsckParams tryharder r) (command', params') | batchmode = toBatchCommand (command, params) | otherwise = (command, params) @@ -76,12 +108,12 @@ findMissing objs r = go objs [] =<< start Right True -> go os (o:c) h Right False -> go os c h -parseFsckOutput :: String -> [Sha] -parseFsckOutput = catMaybes . map extractSha . concat . map words . lines +findShas :: String -> [Sha] +findShas = catMaybes . map extractSha . concat . map words . lines -fsckParams :: Repo -> [CommandParam] -fsckParams = gitCommandLine +fsckParams :: Bool -> Repo -> [CommandParam] +fsckParams verbose = gitCommandLine $ [ Param "fsck" , Param "--no-dangling" , Param "--no-reflogs" - ] + ] ++ if verbose then [ Param "--verbose" ] else [] diff --git a/Git/Repair.hs b/Git/Repair.hs index 8b1b8ab..c650958 100644 --- a/Git/Repair.hs +++ b/Git/Repair.hs @@ -54,17 +54,15 @@ import Data.Tuple.Utils - To remove corrupt objects, unpack all packs, and remove the packs - (to handle corrupt packs), and remove loose object files. -} -cleanCorruptObjects :: FsckResults -> Repo -> IO MissingObjects +cleanCorruptObjects :: FsckResults -> Repo -> IO (Maybe MissingObjects) cleanCorruptObjects mmissing r = check mmissing where check Nothing = do putStrLn "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file?" - ifM (explodePacks r) - ( retry S.empty - , return S.empty - ) + void $ explodePacks r + retry S.empty check (Just bad) - | S.null bad = return S.empty + | S.null bad = return $ Just S.empty | otherwise = do putStrLn $ unwords [ "git fsck found" @@ -75,10 +73,10 @@ cleanCorruptObjects mmissing r = check mmissing removed <- removeLoose r bad if exploded || removed then retry bad - else return bad + else return $ Just bad retry oldbad = do putStrLn "Re-running git fsck to see if it finds more problems." - v <- findBroken False r + v <- findBroken False True r case v of Nothing -> do hPutStrLn stderr $ unwords @@ -86,12 +84,12 @@ cleanCorruptObjects mmissing r = check mmissing , show (S.size oldbad) , "corrupt objects." ] - return S.empty + return Nothing Just newbad -> do removed <- removeLoose r newbad let s = S.union oldbad newbad if not removed || s == oldbad - then return s + then return $ Just s else retry s removeLoose :: Repo -> MissingObjects -> IO Bool @@ -129,21 +127,24 @@ explodePacks r = do {- Try to retrieve a set of missing objects, from the remotes of a - repository. Returns any that could not be retreived. + - + - Can also be run with Nothing, if it's not known which objects are + - missing, just that some are. (Ie, fsck failed badly.) - - If another clone of the repository exists locally, which might not be a - remote of the repo being repaired, its path can be passed as a reference - repository. -} -retrieveMissingObjects :: MissingObjects -> Maybe FilePath -> Repo -> IO MissingObjects +retrieveMissingObjects :: Maybe MissingObjects -> Maybe FilePath -> Repo -> IO (Maybe MissingObjects) retrieveMissingObjects missing referencerepo r - | S.null missing = return missing + | missing == Just S.empty = return $ Just S.empty | otherwise = withTmpDir "tmprepo" $ \tmpdir -> do unlessM (boolSystem "git" [Params "init", File tmpdir]) $ error $ "failed to create temp repository in " ++ tmpdir tmpr <- Config.read =<< Construct.fromAbsPath tmpdir stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing - if S.null stillmissing - then return stillmissing + if stillmissing == Just S.empty + then return $ Just S.empty else pullremotes tmpr (remotes r) fetchallrefs stillmissing where pullremotes tmpr [] fetchrefs stillmissing = case referencerepo of @@ -151,25 +152,30 @@ retrieveMissingObjects missing referencerepo r Just p -> ifM (fetchfrom p fetchrefs tmpr) ( do void $ copyObjects tmpr r - findMissing (S.toList stillmissing) r + case stillmissing of + Nothing -> return $ Just S.empty + Just s -> Just <$> findMissing (S.toList s) r , return stillmissing ) - pullremotes tmpr (rmt:rmts) fetchrefs s - | S.null s = return s + pullremotes tmpr (rmt:rmts) fetchrefs ms + | ms == Just S.empty = return $ Just S.empty | otherwise = do putStrLn $ "Trying to recover missing objects from remote " ++ repoDescribe rmt ifM (fetchfrom (repoLocation rmt) fetchrefs tmpr) ( do void $ copyObjects tmpr r - stillmissing <- findMissing (S.toList s) r - pullremotes tmpr rmts fetchrefs stillmissing + case ms of + Nothing -> pullremotes tmpr rmts fetchrefs ms + Just s -> do + stillmissing <- findMissing (S.toList s) r + pullremotes tmpr rmts fetchrefs (Just stillmissing) , do putStrLn $ unwords [ "failed to fetch from remote" , repoDescribe rmt , "(will continue without it, but making this remote available may improve recovery)" ] - pullremotes tmpr rmts fetchrefs s + pullremotes tmpr rmts fetchrefs ms ) fetchfrom fetchurl ps = runBool $ [ Param "fetch" @@ -468,7 +474,7 @@ runRepair :: Bool -> Repo -> IO (Bool, MissingObjects, [Branch]) runRepair forced g = do preRepair g putStrLn "Running git fsck ..." - fsckresult <- findBroken False g + fsckresult <- findBroken False False g if foundBroken fsckresult then runRepairOf fsckresult forced Nothing g else do @@ -482,25 +488,36 @@ runRepairOf :: FsckResults -> Bool -> Maybe FilePath -> Repo -> IO (Bool, Missin runRepairOf fsckresult forced referencerepo g = do missing <- cleanCorruptObjects fsckresult g stillmissing <- retrieveMissingObjects missing referencerepo g - if S.null stillmissing - then if repoIsLocalBare g - then successfulfinish stillmissing [] - else ifM (checkIndex stillmissing g) - ( successfulfinish stillmissing [] - , do - putStrLn "No missing objects found, but the index file is corrupt!" - if forced - then corruptedindex - else needforce stillmissing - ) - else do - putStrLn $ unwords - [ show (S.size stillmissing) - , "missing objects could not be recovered!" - ] + case stillmissing of + Just s + | S.null s -> if repoIsLocalBare g + then successfulfinish S.empty [] + else ifM (checkIndex S.empty g) + ( successfulfinish s [] + , do + putStrLn "No missing objects found, but the index file is corrupt!" + if forced + then corruptedindex + else needforce S.empty + ) + | otherwise -> do + putStrLn $ unwords + [ show (S.size s) + , "missing objects could not be recovered!" + ] + if forced + then continuerepairs s + else unsuccessfulfinish s + Nothing -> do if forced - then continuerepairs stillmissing - else unsuccessfulfinish stillmissing + then do + fsckresult' <- findBroken False False g + case fsckresult' of + Nothing -> do + putStrLn "Unable to fully recover; cannot find missing objects." + return (False, S.empty, []) + Just stillmissing' -> continuerepairs stillmissing' + else unsuccessfulfinish S.empty where continuerepairs stillmissing = do (remotebranches, goodcommits) <- removeTrackingBranches stillmissing emptyGoodCommits g @@ -540,7 +557,7 @@ runRepairOf fsckresult forced referencerepo g = do nukeIndex g -- The corrupted index can prevent fsck from finding other -- problems, so re-run repair. - fsckresult' <- findBroken False g + fsckresult' <- findBroken False False g result <- runRepairOf fsckresult' forced referencerepo g putStrLn "Removed the corrupted index file. You should look at what files are present in your working tree and git add them back to the index when appropriate." return result @@ -548,8 +565,7 @@ runRepairOf fsckresult forced referencerepo g = do successfulfinish stillmissing modifiedbranches = do mapM_ putStrLn [ "Successfully recovered repository!" - , "You should run \"git fsck\" to make sure, but it looks like" - , "everything was recovered ok." + , "You should run \"git fsck\" to make sure, but it looks like everything was recovered ok." ] return (True, stillmissing, modifiedbranches) unsuccessfulfinish stillmissing = do diff --git a/Utility/Tmp.hs b/Utility/Tmp.hs index 186cd12..3770654 100644 --- a/Utility/Tmp.hs +++ b/Utility/Tmp.hs @@ -62,7 +62,7 @@ withTmpDirIn :: FilePath -> Template -> (FilePath -> IO a) -> IO a withTmpDirIn tmpdir template = bracket create remove where remove d = whenM (doesDirectoryExist d) $ - removeDirectoryRecursive d + return () -- removeDirectoryRecursive d create = do createDirectoryIfMissing True tmpdir makenewdir (tmpdir </> template) (0 :: Int) diff --git a/debian/changelog b/debian/changelog index e34606e..db190a7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -7,6 +7,8 @@ git-repair (1.20131119) UNRELEASED; urgency=low and this hides other problems. * Write a dummy .git/HEAD if the file is missing or corrupt, as git otherwise will not treat the repository as a git repo. + * Improve fsck code to find badly corrupted objects that crash git fsck + before it can complain about them. -- Joey Hess <joeyh@debian.org> Tue, 19 Nov 2013 17:16:56 -0400 diff --git a/git-repair.cabal b/git-repair.cabal index a74b117..77150ee 100644 --- a/git-repair.cabal +++ b/git-repair.cabal @@ -19,7 +19,7 @@ Description: Executable git-repair Main-Is: git-repair.hs - GHC-Options: -Wall + GHC-Options: -Wall -threaded Build-Depends: MissingH, hslogger, directory, filepath, containers, mtl, network, extensible-exceptions, unix-compat, bytestring, base >= 4.5, base < 5, IfElse, text, process, @@ -30,7 +30,7 @@ Executable git-repair Executable test-runner Main-Is: test-runner.hs - GHC-Options: -Wall + GHC-Options: -Wall -threaded Build-Depends: MissingH, hslogger, directory, filepath, containers, mtl, network, extensible-exceptions, unix-compat, bytestring, base >= 4.5, base < 5, IfElse, text, process, diff --git a/test-runner.hs b/test-runner.hs index eb40504..cbb37bc 100644 --- a/test-runner.hs +++ b/test-runner.hs @@ -91,7 +91,7 @@ runTest settings damage = withTmpDir "tmprepo" $ \tmpdir -> do case repairstatus of Just True -> TestResult damage repairstatus . Just . not . Git.Fsck.foundBroken - <$> Git.Fsck.findBroken False g + <$> Git.Fsck.findBroken False False g _ -> return $ TestResult damage repairstatus Nothing data TestResult = TestResult |