summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Git/Fsck.hs58
-rw-r--r--Git/Repair.hs100
-rw-r--r--Utility/Tmp.hs2
-rw-r--r--debian/changelog2
-rw-r--r--git-repair.cabal4
-rw-r--r--test-runner.hs2
6 files changed, 109 insertions, 59 deletions
diff --git a/Git/Fsck.hs b/Git/Fsck.hs
index 2c94230..16b0235 100644
--- a/Git/Fsck.hs
+++ b/Git/Fsck.hs
@@ -21,6 +21,8 @@ import Git.CatFile
import Utility.Batch
import qualified Data.Set as S
+import System.Process (std_err, std_out)
+import Control.Concurrent
type MissingObjects = S.Set Sha
@@ -36,17 +38,47 @@ type FsckResults = Maybe MissingObjects
- look for anything in its output (both stdout and stderr) that appears
- to be a git sha. Not all such shas are of broken objects, so ask git
- to try to cat the object, and see if it fails.
+ -
+ - Some forms of corruption will crash fsck in ways that does not let it
+ - output the sha of the broken object. In such a case, Nothing will be
+ - returned. In this case, to find broken objects, re-run with tryharder
+ - set to True. This makes fsck run in verbose mode, so it prints out
+ - shas before checking them. We assume that the last sha is the one that
+ - it crashed on, and it may have crashed following from eg, a commit to a
+ - tree to a subtree. So, run git show on the sha, and examin the stderr
+ - to find an actual bad sha.
-}
-findBroken :: Bool -> Repo -> IO FsckResults
-findBroken batchmode r = do
+findBroken :: Bool -> Bool -> Repo -> IO FsckResults
+findBroken batchmode tryharder r = do
(output, fsckok) <- processTranscript command' (toCommand params') Nothing
- let objs = parseFsckOutput output
- badobjs <- findMissing objs r
- if S.null badobjs && not fsckok
- then return Nothing
- else return $ Just badobjs
+ let objs = findShas output
+ if fsckok || not tryharder
+ then do
+ badobjs <- findMissing objs r
+ if S.null badobjs && not fsckok
+ then return Nothing
+ else return $ Just badobjs
+ else case lastMaybe objs of
+ Nothing -> return Nothing
+ Just o -> do
+ p@(_, _, _, pid) <- createProcess $
+ ( proc "git" $ toCommand $ gitCommandLine [ Param "show", Param $ show o ] r )
+ { std_err = CreatePipe
+ , std_out = CreatePipe
+ }
+ void $ forkIO $ void $ hGetContents (stdoutHandle p)
+ objs' <- findShas <$>
+ hGetContentsStrict (stderrHandle p)
+ badobjs <- findMissing objs' r
+ ifM (checkSuccessProcess pid)
+ ( if S.null badobjs
+ then return Nothing
+ else return $ Just badobjs
+ , return $ Just $ S.singleton o
+ )
+
where
- (command, params) = ("git", fsckParams r)
+ (command, params) = ("git", fsckParams tryharder r)
(command', params')
| batchmode = toBatchCommand (command, params)
| otherwise = (command, params)
@@ -76,12 +108,12 @@ findMissing objs r = go objs [] =<< start
Right True -> go os (o:c) h
Right False -> go os c h
-parseFsckOutput :: String -> [Sha]
-parseFsckOutput = catMaybes . map extractSha . concat . map words . lines
+findShas :: String -> [Sha]
+findShas = catMaybes . map extractSha . concat . map words . lines
-fsckParams :: Repo -> [CommandParam]
-fsckParams = gitCommandLine
+fsckParams :: Bool -> Repo -> [CommandParam]
+fsckParams verbose = gitCommandLine $
[ Param "fsck"
, Param "--no-dangling"
, Param "--no-reflogs"
- ]
+ ] ++ if verbose then [ Param "--verbose" ] else []
diff --git a/Git/Repair.hs b/Git/Repair.hs
index 8b1b8ab..c650958 100644
--- a/Git/Repair.hs
+++ b/Git/Repair.hs
@@ -54,17 +54,15 @@ import Data.Tuple.Utils
- To remove corrupt objects, unpack all packs, and remove the packs
- (to handle corrupt packs), and remove loose object files.
-}
-cleanCorruptObjects :: FsckResults -> Repo -> IO MissingObjects
+cleanCorruptObjects :: FsckResults -> Repo -> IO (Maybe MissingObjects)
cleanCorruptObjects mmissing r = check mmissing
where
check Nothing = do
putStrLn "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file?"
- ifM (explodePacks r)
- ( retry S.empty
- , return S.empty
- )
+ void $ explodePacks r
+ retry S.empty
check (Just bad)
- | S.null bad = return S.empty
+ | S.null bad = return $ Just S.empty
| otherwise = do
putStrLn $ unwords
[ "git fsck found"
@@ -75,10 +73,10 @@ cleanCorruptObjects mmissing r = check mmissing
removed <- removeLoose r bad
if exploded || removed
then retry bad
- else return bad
+ else return $ Just bad
retry oldbad = do
putStrLn "Re-running git fsck to see if it finds more problems."
- v <- findBroken False r
+ v <- findBroken False True r
case v of
Nothing -> do
hPutStrLn stderr $ unwords
@@ -86,12 +84,12 @@ cleanCorruptObjects mmissing r = check mmissing
, show (S.size oldbad)
, "corrupt objects."
]
- return S.empty
+ return Nothing
Just newbad -> do
removed <- removeLoose r newbad
let s = S.union oldbad newbad
if not removed || s == oldbad
- then return s
+ then return $ Just s
else retry s
removeLoose :: Repo -> MissingObjects -> IO Bool
@@ -129,21 +127,24 @@ explodePacks r = do
{- Try to retrieve a set of missing objects, from the remotes of a
- repository. Returns any that could not be retreived.
+ -
+ - Can also be run with Nothing, if it's not known which objects are
+ - missing, just that some are. (Ie, fsck failed badly.)
-
- If another clone of the repository exists locally, which might not be a
- remote of the repo being repaired, its path can be passed as a reference
- repository.
-}
-retrieveMissingObjects :: MissingObjects -> Maybe FilePath -> Repo -> IO MissingObjects
+retrieveMissingObjects :: Maybe MissingObjects -> Maybe FilePath -> Repo -> IO (Maybe MissingObjects)
retrieveMissingObjects missing referencerepo r
- | S.null missing = return missing
+ | missing == Just S.empty = return $ Just S.empty
| otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
unlessM (boolSystem "git" [Params "init", File tmpdir]) $
error $ "failed to create temp repository in " ++ tmpdir
tmpr <- Config.read =<< Construct.fromAbsPath tmpdir
stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing
- if S.null stillmissing
- then return stillmissing
+ if stillmissing == Just S.empty
+ then return $ Just S.empty
else pullremotes tmpr (remotes r) fetchallrefs stillmissing
where
pullremotes tmpr [] fetchrefs stillmissing = case referencerepo of
@@ -151,25 +152,30 @@ retrieveMissingObjects missing referencerepo r
Just p -> ifM (fetchfrom p fetchrefs tmpr)
( do
void $ copyObjects tmpr r
- findMissing (S.toList stillmissing) r
+ case stillmissing of
+ Nothing -> return $ Just S.empty
+ Just s -> Just <$> findMissing (S.toList s) r
, return stillmissing
)
- pullremotes tmpr (rmt:rmts) fetchrefs s
- | S.null s = return s
+ pullremotes tmpr (rmt:rmts) fetchrefs ms
+ | ms == Just S.empty = return $ Just S.empty
| otherwise = do
putStrLn $ "Trying to recover missing objects from remote " ++ repoDescribe rmt
ifM (fetchfrom (repoLocation rmt) fetchrefs tmpr)
( do
void $ copyObjects tmpr r
- stillmissing <- findMissing (S.toList s) r
- pullremotes tmpr rmts fetchrefs stillmissing
+ case ms of
+ Nothing -> pullremotes tmpr rmts fetchrefs ms
+ Just s -> do
+ stillmissing <- findMissing (S.toList s) r
+ pullremotes tmpr rmts fetchrefs (Just stillmissing)
, do
putStrLn $ unwords
[ "failed to fetch from remote"
, repoDescribe rmt
, "(will continue without it, but making this remote available may improve recovery)"
]
- pullremotes tmpr rmts fetchrefs s
+ pullremotes tmpr rmts fetchrefs ms
)
fetchfrom fetchurl ps = runBool $
[ Param "fetch"
@@ -468,7 +474,7 @@ runRepair :: Bool -> Repo -> IO (Bool, MissingObjects, [Branch])
runRepair forced g = do
preRepair g
putStrLn "Running git fsck ..."
- fsckresult <- findBroken False g
+ fsckresult <- findBroken False False g
if foundBroken fsckresult
then runRepairOf fsckresult forced Nothing g
else do
@@ -482,25 +488,36 @@ runRepairOf :: FsckResults -> Bool -> Maybe FilePath -> Repo -> IO (Bool, Missin
runRepairOf fsckresult forced referencerepo g = do
missing <- cleanCorruptObjects fsckresult g
stillmissing <- retrieveMissingObjects missing referencerepo g
- if S.null stillmissing
- then if repoIsLocalBare g
- then successfulfinish stillmissing []
- else ifM (checkIndex stillmissing g)
- ( successfulfinish stillmissing []
- , do
- putStrLn "No missing objects found, but the index file is corrupt!"
- if forced
- then corruptedindex
- else needforce stillmissing
- )
- else do
- putStrLn $ unwords
- [ show (S.size stillmissing)
- , "missing objects could not be recovered!"
- ]
+ case stillmissing of
+ Just s
+ | S.null s -> if repoIsLocalBare g
+ then successfulfinish S.empty []
+ else ifM (checkIndex S.empty g)
+ ( successfulfinish s []
+ , do
+ putStrLn "No missing objects found, but the index file is corrupt!"
+ if forced
+ then corruptedindex
+ else needforce S.empty
+ )
+ | otherwise -> do
+ putStrLn $ unwords
+ [ show (S.size s)
+ , "missing objects could not be recovered!"
+ ]
+ if forced
+ then continuerepairs s
+ else unsuccessfulfinish s
+ Nothing -> do
if forced
- then continuerepairs stillmissing
- else unsuccessfulfinish stillmissing
+ then do
+ fsckresult' <- findBroken False False g
+ case fsckresult' of
+ Nothing -> do
+ putStrLn "Unable to fully recover; cannot find missing objects."
+ return (False, S.empty, [])
+ Just stillmissing' -> continuerepairs stillmissing'
+ else unsuccessfulfinish S.empty
where
continuerepairs stillmissing = do
(remotebranches, goodcommits) <- removeTrackingBranches stillmissing emptyGoodCommits g
@@ -540,7 +557,7 @@ runRepairOf fsckresult forced referencerepo g = do
nukeIndex g
-- The corrupted index can prevent fsck from finding other
-- problems, so re-run repair.
- fsckresult' <- findBroken False g
+ fsckresult' <- findBroken False False g
result <- runRepairOf fsckresult' forced referencerepo g
putStrLn "Removed the corrupted index file. You should look at what files are present in your working tree and git add them back to the index when appropriate."
return result
@@ -548,8 +565,7 @@ runRepairOf fsckresult forced referencerepo g = do
successfulfinish stillmissing modifiedbranches = do
mapM_ putStrLn
[ "Successfully recovered repository!"
- , "You should run \"git fsck\" to make sure, but it looks like"
- , "everything was recovered ok."
+ , "You should run \"git fsck\" to make sure, but it looks like everything was recovered ok."
]
return (True, stillmissing, modifiedbranches)
unsuccessfulfinish stillmissing = do
diff --git a/Utility/Tmp.hs b/Utility/Tmp.hs
index 186cd12..3770654 100644
--- a/Utility/Tmp.hs
+++ b/Utility/Tmp.hs
@@ -62,7 +62,7 @@ withTmpDirIn :: FilePath -> Template -> (FilePath -> IO a) -> IO a
withTmpDirIn tmpdir template = bracket create remove
where
remove d = whenM (doesDirectoryExist d) $
- removeDirectoryRecursive d
+ return () -- removeDirectoryRecursive d
create = do
createDirectoryIfMissing True tmpdir
makenewdir (tmpdir </> template) (0 :: Int)
diff --git a/debian/changelog b/debian/changelog
index e34606e..db190a7 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -7,6 +7,8 @@ git-repair (1.20131119) UNRELEASED; urgency=low
and this hides other problems.
* Write a dummy .git/HEAD if the file is missing or corrupt, as
git otherwise will not treat the repository as a git repo.
+ * Improve fsck code to find badly corrupted objects that crash git fsck
+ before it can complain about them.
-- Joey Hess <joeyh@debian.org> Tue, 19 Nov 2013 17:16:56 -0400
diff --git a/git-repair.cabal b/git-repair.cabal
index a74b117..77150ee 100644
--- a/git-repair.cabal
+++ b/git-repair.cabal
@@ -19,7 +19,7 @@ Description:
Executable git-repair
Main-Is: git-repair.hs
- GHC-Options: -Wall
+ GHC-Options: -Wall -threaded
Build-Depends: MissingH, hslogger, directory, filepath, containers, mtl,
network, extensible-exceptions, unix-compat, bytestring,
base >= 4.5, base < 5, IfElse, text, process,
@@ -30,7 +30,7 @@ Executable git-repair
Executable test-runner
Main-Is: test-runner.hs
- GHC-Options: -Wall
+ GHC-Options: -Wall -threaded
Build-Depends: MissingH, hslogger, directory, filepath, containers, mtl,
network, extensible-exceptions, unix-compat, bytestring,
base >= 4.5, base < 5, IfElse, text, process,
diff --git a/test-runner.hs b/test-runner.hs
index eb40504..cbb37bc 100644
--- a/test-runner.hs
+++ b/test-runner.hs
@@ -91,7 +91,7 @@ runTest settings damage = withTmpDir "tmprepo" $ \tmpdir -> do
case repairstatus of
Just True -> TestResult damage repairstatus
. Just . not . Git.Fsck.foundBroken
- <$> Git.Fsck.findBroken False g
+ <$> Git.Fsck.findBroken False False g
_ -> return $ TestResult damage repairstatus Nothing
data TestResult = TestResult