summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoey Hess <joey@kitenet.net>2013-11-20 15:14:20 -0400
committerJoey Hess <joey@kitenet.net>2013-11-20 15:14:20 -0400
commit6d67245728bbbc07ad1eeaf5b3c49f64c6bbcd11 (patch)
treefb9fbeb6820e46eef37270a60704947e5aad4bed
parentc2cd79e09b5aad1d634c58f5a130654649e25c49 (diff)
downloadgit-repair-6d67245728bbbc07ad1eeaf5b3c49f64c6bbcd11.tar.gz
try to recover even if git fsck cannot be coaxed to tell us any bad objects
Sometimes git fsck outputs no shas even with --verbose, but fails, due to badly corrupt objects. The best thing to do in this situation is to try to pull and rsync from remotes, hoping that the bad objects will be overwritten.
-rw-r--r--Git/Fsck.hs58
-rw-r--r--Git/Repair.hs100
-rw-r--r--Utility/Tmp.hs2
-rw-r--r--debian/changelog2
-rw-r--r--git-repair.cabal4
-rw-r--r--test-runner.hs2
6 files changed, 109 insertions, 59 deletions
diff --git a/Git/Fsck.hs b/Git/Fsck.hs
index 2c94230..16b0235 100644
--- a/Git/Fsck.hs
+++ b/Git/Fsck.hs
@@ -21,6 +21,8 @@ import Git.CatFile
import Utility.Batch
import qualified Data.Set as S
+import System.Process (std_err, std_out)
+import Control.Concurrent
type MissingObjects = S.Set Sha
@@ -36,17 +38,47 @@ type FsckResults = Maybe MissingObjects
- look for anything in its output (both stdout and stderr) that appears
- to be a git sha. Not all such shas are of broken objects, so ask git
- to try to cat the object, and see if it fails.
+ -
+ - Some forms of corruption will crash fsck in ways that does not let it
+ - output the sha of the broken object. In such a case, Nothing will be
+ - returned. In this case, to find broken objects, re-run with tryharder
+ - set to True. This makes fsck run in verbose mode, so it prints out
+ - shas before checking them. We assume that the last sha is the one that
+ - it crashed on, and it may have crashed following from eg, a commit to a
+ - tree to a subtree. So, run git show on the sha, and examin the stderr
+ - to find an actual bad sha.
-}
-findBroken :: Bool -> Repo -> IO FsckResults
-findBroken batchmode r = do
+findBroken :: Bool -> Bool -> Repo -> IO FsckResults
+findBroken batchmode tryharder r = do
(output, fsckok) <- processTranscript command' (toCommand params') Nothing
- let objs = parseFsckOutput output
- badobjs <- findMissing objs r
- if S.null badobjs && not fsckok
- then return Nothing
- else return $ Just badobjs
+ let objs = findShas output
+ if fsckok || not tryharder
+ then do
+ badobjs <- findMissing objs r
+ if S.null badobjs && not fsckok
+ then return Nothing
+ else return $ Just badobjs
+ else case lastMaybe objs of
+ Nothing -> return Nothing
+ Just o -> do
+ p@(_, _, _, pid) <- createProcess $
+ ( proc "git" $ toCommand $ gitCommandLine [ Param "show", Param $ show o ] r )
+ { std_err = CreatePipe
+ , std_out = CreatePipe
+ }
+ void $ forkIO $ void $ hGetContents (stdoutHandle p)
+ objs' <- findShas <$>
+ hGetContentsStrict (stderrHandle p)
+ badobjs <- findMissing objs' r
+ ifM (checkSuccessProcess pid)
+ ( if S.null badobjs
+ then return Nothing
+ else return $ Just badobjs
+ , return $ Just $ S.singleton o
+ )
+
where
- (command, params) = ("git", fsckParams r)
+ (command, params) = ("git", fsckParams tryharder r)
(command', params')
| batchmode = toBatchCommand (command, params)
| otherwise = (command, params)
@@ -76,12 +108,12 @@ findMissing objs r = go objs [] =<< start
Right True -> go os (o:c) h
Right False -> go os c h
-parseFsckOutput :: String -> [Sha]
-parseFsckOutput = catMaybes . map extractSha . concat . map words . lines
+findShas :: String -> [Sha]
+findShas = catMaybes . map extractSha . concat . map words . lines
-fsckParams :: Repo -> [CommandParam]
-fsckParams = gitCommandLine
+fsckParams :: Bool -> Repo -> [CommandParam]
+fsckParams verbose = gitCommandLine $
[ Param "fsck"
, Param "--no-dangling"
, Param "--no-reflogs"
- ]
+ ] ++ if verbose then [ Param "--verbose" ] else []
diff --git a/Git/Repair.hs b/Git/Repair.hs
index 8b1b8ab..c650958 100644
--- a/Git/Repair.hs
+++ b/Git/Repair.hs
@@ -54,17 +54,15 @@ import Data.Tuple.Utils
- To remove corrupt objects, unpack all packs, and remove the packs
- (to handle corrupt packs), and remove loose object files.
-}
-cleanCorruptObjects :: FsckResults -> Repo -> IO MissingObjects
+cleanCorruptObjects :: FsckResults -> Repo -> IO (Maybe MissingObjects)
cleanCorruptObjects mmissing r = check mmissing
where
check Nothing = do
putStrLn "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file?"
- ifM (explodePacks r)
- ( retry S.empty
- , return S.empty
- )
+ void $ explodePacks r
+ retry S.empty
check (Just bad)
- | S.null bad = return S.empty
+ | S.null bad = return $ Just S.empty
| otherwise = do
putStrLn $ unwords
[ "git fsck found"
@@ -75,10 +73,10 @@ cleanCorruptObjects mmissing r = check mmissing
removed <- removeLoose r bad
if exploded || removed
then retry bad
- else return bad
+ else return $ Just bad
retry oldbad = do
putStrLn "Re-running git fsck to see if it finds more problems."
- v <- findBroken False r
+ v <- findBroken False True r
case v of
Nothing -> do
hPutStrLn stderr $ unwords
@@ -86,12 +84,12 @@ cleanCorruptObjects mmissing r = check mmissing
, show (S.size oldbad)
, "corrupt objects."
]
- return S.empty
+ return Nothing
Just newbad -> do
removed <- removeLoose r newbad
let s = S.union oldbad newbad
if not removed || s == oldbad
- then return s
+ then return $ Just s
else retry s
removeLoose :: Repo -> MissingObjects -> IO Bool
@@ -129,21 +127,24 @@ explodePacks r = do
{- Try to retrieve a set of missing objects, from the remotes of a
- repository. Returns any that could not be retreived.
+ -
+ - Can also be run with Nothing, if it's not known which objects are
+ - missing, just that some are. (Ie, fsck failed badly.)
-
- If another clone of the repository exists locally, which might not be a
- remote of the repo being repaired, its path can be passed as a reference
- repository.
-}
-retrieveMissingObjects :: MissingObjects -> Maybe FilePath -> Repo -> IO MissingObjects
+retrieveMissingObjects :: Maybe MissingObjects -> Maybe FilePath -> Repo -> IO (Maybe MissingObjects)
retrieveMissingObjects missing referencerepo r
- | S.null missing = return missing
+ | missing == Just S.empty = return $ Just S.empty
| otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
unlessM (boolSystem "git" [Params "init", File tmpdir]) $
error $ "failed to create temp repository in " ++ tmpdir
tmpr <- Config.read =<< Construct.fromAbsPath tmpdir
stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing
- if S.null stillmissing
- then return stillmissing
+ if stillmissing == Just S.empty
+ then return $ Just S.empty
else pullremotes tmpr (remotes r) fetchallrefs stillmissing
where
pullremotes tmpr [] fetchrefs stillmissing = case referencerepo of
@@ -151,25 +152,30 @@ retrieveMissingObjects missing referencerepo r
Just p -> ifM (fetchfrom p fetchrefs tmpr)
( do
void $ copyObjects tmpr r
- findMissing (S.toList stillmissing) r
+ case stillmissing of
+ Nothing -> return $ Just S.empty
+ Just s -> Just <$> findMissing (S.toList s) r
, return stillmissing
)
- pullremotes tmpr (rmt:rmts) fetchrefs s
- | S.null s = return s
+ pullremotes tmpr (rmt:rmts) fetchrefs ms
+ | ms == Just S.empty = return $ Just S.empty
| otherwise = do
putStrLn $ "Trying to recover missing objects from remote " ++ repoDescribe rmt
ifM (fetchfrom (repoLocation rmt) fetchrefs tmpr)
( do
void $ copyObjects tmpr r
- stillmissing <- findMissing (S.toList s) r
- pullremotes tmpr rmts fetchrefs stillmissing
+ case ms of
+ Nothing -> pullremotes tmpr rmts fetchrefs ms
+ Just s -> do
+ stillmissing <- findMissing (S.toList s) r
+ pullremotes tmpr rmts fetchrefs (Just stillmissing)
, do
putStrLn $ unwords
[ "failed to fetch from remote"
, repoDescribe rmt
, "(will continue without it, but making this remote available may improve recovery)"
]
- pullremotes tmpr rmts fetchrefs s
+ pullremotes tmpr rmts fetchrefs ms
)
fetchfrom fetchurl ps = runBool $
[ Param "fetch"
@@ -468,7 +474,7 @@ runRepair :: Bool -> Repo -> IO (Bool, MissingObjects, [Branch])
runRepair forced g = do
preRepair g
putStrLn "Running git fsck ..."
- fsckresult <- findBroken False g
+ fsckresult <- findBroken False False g
if foundBroken fsckresult
then runRepairOf fsckresult forced Nothing g
else do
@@ -482,25 +488,36 @@ runRepairOf :: FsckResults -> Bool -> Maybe FilePath -> Repo -> IO (Bool, Missin
runRepairOf fsckresult forced referencerepo g = do
missing <- cleanCorruptObjects fsckresult g
stillmissing <- retrieveMissingObjects missing referencerepo g
- if S.null stillmissing
- then if repoIsLocalBare g
- then successfulfinish stillmissing []
- else ifM (checkIndex stillmissing g)
- ( successfulfinish stillmissing []
- , do
- putStrLn "No missing objects found, but the index file is corrupt!"
- if forced
- then corruptedindex
- else needforce stillmissing
- )
- else do
- putStrLn $ unwords
- [ show (S.size stillmissing)
- , "missing objects could not be recovered!"
- ]
+ case stillmissing of
+ Just s
+ | S.null s -> if repoIsLocalBare g
+ then successfulfinish S.empty []
+ else ifM (checkIndex S.empty g)
+ ( successfulfinish s []
+ , do
+ putStrLn "No missing objects found, but the index file is corrupt!"
+ if forced
+ then corruptedindex
+ else needforce S.empty
+ )
+ | otherwise -> do
+ putStrLn $ unwords
+ [ show (S.size s)
+ , "missing objects could not be recovered!"
+ ]
+ if forced
+ then continuerepairs s
+ else unsuccessfulfinish s
+ Nothing -> do
if forced
- then continuerepairs stillmissing
- else unsuccessfulfinish stillmissing
+ then do
+ fsckresult' <- findBroken False False g
+ case fsckresult' of
+ Nothing -> do
+ putStrLn "Unable to fully recover; cannot find missing objects."
+ return (False, S.empty, [])
+ Just stillmissing' -> continuerepairs stillmissing'
+ else unsuccessfulfinish S.empty
where
continuerepairs stillmissing = do
(remotebranches, goodcommits) <- removeTrackingBranches stillmissing emptyGoodCommits g
@@ -540,7 +557,7 @@ runRepairOf fsckresult forced referencerepo g = do
nukeIndex g
-- The corrupted index can prevent fsck from finding other
-- problems, so re-run repair.
- fsckresult' <- findBroken False g
+ fsckresult' <- findBroken False False g
result <- runRepairOf fsckresult' forced referencerepo g
putStrLn "Removed the corrupted index file. You should look at what files are present in your working tree and git add them back to the index when appropriate."
return result
@@ -548,8 +565,7 @@ runRepairOf fsckresult forced referencerepo g = do
successfulfinish stillmissing modifiedbranches = do
mapM_ putStrLn
[ "Successfully recovered repository!"
- , "You should run \"git fsck\" to make sure, but it looks like"
- , "everything was recovered ok."
+ , "You should run \"git fsck\" to make sure, but it looks like everything was recovered ok."
]
return (True, stillmissing, modifiedbranches)
unsuccessfulfinish stillmissing = do
diff --git a/Utility/Tmp.hs b/Utility/Tmp.hs
index 186cd12..3770654 100644
--- a/Utility/Tmp.hs
+++ b/Utility/Tmp.hs
@@ -62,7 +62,7 @@ withTmpDirIn :: FilePath -> Template -> (FilePath -> IO a) -> IO a
withTmpDirIn tmpdir template = bracket create remove
where
remove d = whenM (doesDirectoryExist d) $
- removeDirectoryRecursive d
+ return () -- removeDirectoryRecursive d
create = do
createDirectoryIfMissing True tmpdir
makenewdir (tmpdir </> template) (0 :: Int)
diff --git a/debian/changelog b/debian/changelog
index e34606e..db190a7 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -7,6 +7,8 @@ git-repair (1.20131119) UNRELEASED; urgency=low
and this hides other problems.
* Write a dummy .git/HEAD if the file is missing or corrupt, as
git otherwise will not treat the repository as a git repo.
+ * Improve fsck code to find badly corrupted objects that crash git fsck
+ before it can complain about them.
-- Joey Hess <joeyh@debian.org> Tue, 19 Nov 2013 17:16:56 -0400
diff --git a/git-repair.cabal b/git-repair.cabal
index a74b117..77150ee 100644
--- a/git-repair.cabal
+++ b/git-repair.cabal
@@ -19,7 +19,7 @@ Description:
Executable git-repair
Main-Is: git-repair.hs
- GHC-Options: -Wall
+ GHC-Options: -Wall -threaded
Build-Depends: MissingH, hslogger, directory, filepath, containers, mtl,
network, extensible-exceptions, unix-compat, bytestring,
base >= 4.5, base < 5, IfElse, text, process,
@@ -30,7 +30,7 @@ Executable git-repair
Executable test-runner
Main-Is: test-runner.hs
- GHC-Options: -Wall
+ GHC-Options: -Wall -threaded
Build-Depends: MissingH, hslogger, directory, filepath, containers, mtl,
network, extensible-exceptions, unix-compat, bytestring,
base >= 4.5, base < 5, IfElse, text, process,
diff --git a/test-runner.hs b/test-runner.hs
index eb40504..cbb37bc 100644
--- a/test-runner.hs
+++ b/test-runner.hs
@@ -91,7 +91,7 @@ runTest settings damage = withTmpDir "tmprepo" $ \tmpdir -> do
case repairstatus of
Just True -> TestResult damage repairstatus
. Just . not . Git.Fsck.foundBroken
- <$> Git.Fsck.findBroken False g
+ <$> Git.Fsck.findBroken False False g
_ -> return $ TestResult damage repairstatus Nothing
data TestResult = TestResult