summaryrefslogtreecommitdiff
path: root/Git/CatFile.hs
diff options
context:
space:
mode:
authorJoey Hess <joeyh@joeyh.name>2021-01-11 21:52:32 -0400
committerJoey Hess <joeyh@joeyh.name>2021-01-11 21:52:32 -0400
commitad48349741384ed0e49fab9cf13ac7f90aba0dd1 (patch)
tree6b8c894ce1057d069f89e7209c266f00ea43ec66 /Git/CatFile.hs
parentb3e72e94efbce652f25fb99d6c6ace8beb2a52d4 (diff)
downloadgit-repair-ad48349741384ed0e49fab9cf13ac7f90aba0dd1.tar.gz
Merge from git-annex.
Diffstat (limited to 'Git/CatFile.hs')
-rw-r--r--Git/CatFile.hs181
1 files changed, 160 insertions, 21 deletions
diff --git a/Git/CatFile.hs b/Git/CatFile.hs
index 1769e57..6bea8c0 100644
--- a/Git/CatFile.hs
+++ b/Git/CatFile.hs
@@ -6,6 +6,7 @@
-}
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE BangPatterns #-}
module Git.CatFile (
CatFileHandle,
@@ -19,6 +20,9 @@ module Git.CatFile (
catObject,
catObjectDetails,
catObjectMetaData,
+ catObjectStreamLsTree,
+ catObjectStream,
+ catObjectMetaDataStream,
) where
import System.IO
@@ -27,12 +31,15 @@ import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Char8 as S8
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
-import qualified Data.Map as M
+import qualified Data.Map.Strict as M
import Data.String
import Data.Char
import Numeric
import System.Posix.Types
import Text.Read
+import Control.Concurrent.Async
+import Control.Concurrent.Chan
+import Control.Monad.IO.Class (MonadIO)
import Common
import Git
@@ -40,9 +47,10 @@ import Git.Sha
import qualified Git.Ref
import Git.Command
import Git.Types
-import Git.FilePath
import Git.HashObject
+import qualified Git.LsTree as LsTree
import qualified Utility.CoProcess as CoProcess
+import qualified Git.BuildVersion as BuildVersion
import Utility.Tuple
data CatFileHandle = CatFileHandle
@@ -57,7 +65,7 @@ catFileStart = catFileStart' True
catFileStart' :: Bool -> Repo -> IO CatFileHandle
catFileStart' restartable repo = CatFileHandle
<$> startp "--batch"
- <*> startp "--batch-check=%(objectname) %(objecttype) %(objectsize)"
+ <*> startp ("--batch-check=" ++ batchFormat)
<*> pure repo
where
startp p = gitCoProcessStart restartable
@@ -65,6 +73,9 @@ catFileStart' restartable repo = CatFileHandle
, Param p
] repo
+batchFormat :: String
+batchFormat = "%(objectname) %(objecttype) %(objectsize)"
+
catFileStop :: CatFileHandle -> IO ()
catFileStop h = do
CoProcess.stop (catFileProcess h)
@@ -72,12 +83,12 @@ catFileStop h = do
{- Reads a file from a specified branch. -}
catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString
-catFile h branch file = catObject h $ Ref $
- fromRef' branch <> ":" <> toInternalGitPath file
+catFile h branch file = catObject h $
+ Git.Ref.branchFileRef branch file
catFileDetails :: CatFileHandle -> Branch -> RawFilePath -> IO (Maybe (L.ByteString, Sha, ObjectType))
-catFileDetails h branch file = catObjectDetails h $ Ref $
- fromRef' branch <> ":" <> toInternalGitPath file
+catFileDetails h branch file = catObjectDetails h $
+ Git.Ref.branchFileRef branch file
{- Uses a running git cat-file read the content of an object.
- Objects that do not exist will have "" returned. -}
@@ -88,18 +99,12 @@ catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, Object
catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \from -> do
header <- S8.hGetLine from
case parseResp object header of
- Just (ParsedResp sha objtype size) -> do
- content <- S.hGet from (fromIntegral size)
- eatchar '\n' from
- return $ Just (L.fromChunks [content], sha, objtype)
+ Just r@(ParsedResp sha objtype _size) -> do
+ content <- readObjectContent from r
+ return $ Just (content, sha, objtype)
Just DNE -> return Nothing
Nothing -> error $ "unknown response from git cat-file " ++ show (header, object)
where
- eatchar expected from = do
- c <- hGetChar from
- when (c /= expected) $
- error $ "missing " ++ (show expected) ++ " from git cat-file"
-
-- Slow fallback path for filenames containing newlines.
newlinefallback = queryObjectType object (gitRepo h) >>= \case
Nothing -> return Nothing
@@ -113,6 +118,18 @@ catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \f
(gitRepo h)
return (Just (content, sha, objtype))
+readObjectContent :: Handle -> ParsedResp -> IO L.ByteString
+readObjectContent h (ParsedResp _ _ size) = do
+ content <- S.hGet h (fromIntegral size)
+ eatchar '\n'
+ return (L.fromChunks [content])
+ where
+ eatchar expected = do
+ c <- hGetChar h
+ when (c /= expected) $
+ error $ "missing " ++ (show expected) ++ " from git cat-file"
+readObjectContent _ DNE = error "internal"
+
{- Gets the size and type of an object, without reading its content. -}
catObjectMetaData :: CatFileHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType))
catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $ \from -> do
@@ -180,14 +197,16 @@ querySingle o r repo reader = assertLocal repo $
, std_in = Inherit
, std_out = CreatePipe
}
- pid <- createProcess p'
- let h = stdoutHandle pid
- output <- reader h
- hClose h
- ifM (checkSuccessProcess (processHandle pid))
+ withCreateProcess p' go
+ where
+ go _ (Just outh) _ pid = do
+ output <- reader outh
+ hClose outh
+ ifM (checkSuccessProcess pid)
( return (Just output)
, return Nothing
)
+ go _ _ _ _ = error "internal"
querySize :: Ref -> Repo -> IO (Maybe FileSize)
querySize r repo = maybe Nothing (readMaybe . takeWhile (/= '\n'))
@@ -264,3 +283,123 @@ parseCommit b = Commit
sp = fromIntegral (ord ' ')
lt = fromIntegral (ord '<')
gt = fromIntegral (ord '>')
+
+{- Uses cat-file to stream the contents of the files as efficiently
+ - as possible. This is much faster than querying it repeatedly per file.
+ -}
+catObjectStreamLsTree
+ :: (MonadMask m, MonadIO m)
+ => [LsTree.TreeItem]
+ -> (LsTree.TreeItem -> Maybe v)
+ -> Repo
+ -> (IO (Maybe (v, Maybe L.ByteString)) -> m a)
+ -> m a
+catObjectStreamLsTree l want repo reader = withCatFileStream False repo $
+ \c hin hout -> bracketIO
+ (async $ feeder c hin)
+ cancel
+ (const (reader (catObjectReader readObjectContent c hout)))
+ where
+ feeder c h = do
+ forM_ l $ \ti -> case want ti of
+ Nothing -> return ()
+ Just v -> do
+ let sha = LsTree.sha ti
+ liftIO $ writeChan c (sha, v)
+ S8.hPutStrLn h (fromRef' sha)
+ hClose h
+
+catObjectStream
+ :: (MonadMask m, MonadIO m)
+ => Repo
+ -> (
+ ((v, Ref) -> IO ()) -- ^ call to feed values in
+ -> IO () -- call once all values are fed in
+ -> IO (Maybe (v, Maybe L.ByteString)) -- call to read results
+ -> m a
+ )
+ -> m a
+catObjectStream repo a = withCatFileStream False repo go
+ where
+ go c hin hout = a
+ (feeder c hin)
+ (hClose hin)
+ (catObjectReader readObjectContent c hout)
+ feeder c h (v, ref) = do
+ liftIO $ writeChan c (ref, v)
+ S8.hPutStrLn h (fromRef' ref)
+
+catObjectMetaDataStream
+ :: (MonadMask m, MonadIO m)
+ => Repo
+ -> (
+ ((v, Ref) -> IO ()) -- ^ call to feed values in
+ -> IO () -- call once all values are fed in
+ -> IO (Maybe (v, Maybe (Sha, FileSize, ObjectType))) -- call to read results
+ -> m a
+ )
+ -> m a
+catObjectMetaDataStream repo a = withCatFileStream True repo go
+ where
+ go c hin hout = a
+ (feeder c hin)
+ (hClose hin)
+ (catObjectReader (\_h r -> pure (conv r)) c hout)
+
+ feeder c h (v, ref) = do
+ liftIO $ writeChan c (ref, v)
+ S8.hPutStrLn h (fromRef' ref)
+
+ conv (ParsedResp sha ty sz) = (sha, sz, ty)
+ conv DNE = error "internal"
+
+catObjectReader
+ :: (Handle -> ParsedResp -> IO t)
+ -> Chan (Ref, a)
+ -> Handle
+ -> IO (Maybe (a, Maybe t))
+catObjectReader getv c h = ifM (hIsEOF h)
+ ( return Nothing
+ , do
+ (ref, f) <- liftIO $ readChan c
+ resp <- S8.hGetLine h
+ case parseResp ref resp of
+ Just r@(ParsedResp {}) -> do
+ v <- getv h r
+ return (Just (f, Just v))
+ Just DNE -> return (Just (f, Nothing))
+ Nothing -> error $ "unknown response from git cat-file " ++ show resp
+ )
+
+withCatFileStream
+ :: (MonadMask m, MonadIO m)
+ => Bool
+ -> Repo
+ -> (Chan v -> Handle -> Handle -> m a)
+ -> m a
+withCatFileStream check repo reader = assertLocal repo $
+ bracketIO start stop $ \(c, hin, hout, _) -> reader c hin hout
+ where
+ params = catMaybes
+ [ Just $ Param "cat-file"
+ , Just $ Param ("--batch" ++ (if check then "-check" else "") ++ "=" ++ batchFormat)
+ -- This option makes it faster, but is not present in
+ -- older versions of git.
+ , if BuildVersion.older "2.4.3"
+ then Nothing
+ else Just $ Param "--buffer"
+ ]
+
+ start = do
+ let p = gitCreateProcess params repo
+ (Just hin, Just hout, _, pid) <- createProcess p
+ { std_in = CreatePipe
+ , std_out = CreatePipe
+ }
+ c <- newChan
+ return (c, hin, hout, pid)
+
+ stop (_, hin, hout, pid) = do
+ hClose hin
+ hClose hout
+ void $ checkSuccessProcess pid