From ad48349741384ed0e49fab9cf13ac7f90aba0dd1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 11 Jan 2021 21:52:32 -0400 Subject: Merge from git-annex. --- Git/CatFile.hs | 181 ++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 160 insertions(+), 21 deletions(-) (limited to 'Git/CatFile.hs') diff --git a/Git/CatFile.hs b/Git/CatFile.hs index 1769e57..6bea8c0 100644 --- a/Git/CatFile.hs +++ b/Git/CatFile.hs @@ -6,6 +6,7 @@ -} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE BangPatterns #-} module Git.CatFile ( CatFileHandle, @@ -19,6 +20,9 @@ module Git.CatFile ( catObject, catObjectDetails, catObjectMetaData, + catObjectStreamLsTree, + catObjectStream, + catObjectMetaDataStream, ) where import System.IO @@ -27,12 +31,15 @@ import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Char8 as S8 import qualified Data.Attoparsec.ByteString as A import qualified Data.Attoparsec.ByteString.Char8 as A8 -import qualified Data.Map as M +import qualified Data.Map.Strict as M import Data.String import Data.Char import Numeric import System.Posix.Types import Text.Read +import Control.Concurrent.Async +import Control.Concurrent.Chan +import Control.Monad.IO.Class (MonadIO) import Common import Git @@ -40,9 +47,10 @@ import Git.Sha import qualified Git.Ref import Git.Command import Git.Types -import Git.FilePath import Git.HashObject +import qualified Git.LsTree as LsTree import qualified Utility.CoProcess as CoProcess +import qualified Git.BuildVersion as BuildVersion import Utility.Tuple data CatFileHandle = CatFileHandle @@ -57,7 +65,7 @@ catFileStart = catFileStart' True catFileStart' :: Bool -> Repo -> IO CatFileHandle catFileStart' restartable repo = CatFileHandle <$> startp "--batch" - <*> startp "--batch-check=%(objectname) %(objecttype) %(objectsize)" + <*> startp ("--batch-check=" ++ batchFormat) <*> pure repo where startp p = gitCoProcessStart restartable @@ -65,6 +73,9 @@ catFileStart' restartable repo = CatFileHandle , Param p ] repo +batchFormat :: String +batchFormat = "%(objectname) %(objecttype) %(objectsize)" + catFileStop :: CatFileHandle -> IO () catFileStop h = do CoProcess.stop (catFileProcess h) @@ -72,12 +83,12 @@ catFileStop h = do {- Reads a file from a specified branch. -} catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString -catFile h branch file = catObject h $ Ref $ - fromRef' branch <> ":" <> toInternalGitPath file +catFile h branch file = catObject h $ + Git.Ref.branchFileRef branch file catFileDetails :: CatFileHandle -> Branch -> RawFilePath -> IO (Maybe (L.ByteString, Sha, ObjectType)) -catFileDetails h branch file = catObjectDetails h $ Ref $ - fromRef' branch <> ":" <> toInternalGitPath file +catFileDetails h branch file = catObjectDetails h $ + Git.Ref.branchFileRef branch file {- Uses a running git cat-file read the content of an object. - Objects that do not exist will have "" returned. -} @@ -88,18 +99,12 @@ catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, Object catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \from -> do header <- S8.hGetLine from case parseResp object header of - Just (ParsedResp sha objtype size) -> do - content <- S.hGet from (fromIntegral size) - eatchar '\n' from - return $ Just (L.fromChunks [content], sha, objtype) + Just r@(ParsedResp sha objtype _size) -> do + content <- readObjectContent from r + return $ Just (content, sha, objtype) Just DNE -> return Nothing Nothing -> error $ "unknown response from git cat-file " ++ show (header, object) where - eatchar expected from = do - c <- hGetChar from - when (c /= expected) $ - error $ "missing " ++ (show expected) ++ " from git cat-file" - -- Slow fallback path for filenames containing newlines. newlinefallback = queryObjectType object (gitRepo h) >>= \case Nothing -> return Nothing @@ -113,6 +118,18 @@ catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \f (gitRepo h) return (Just (content, sha, objtype)) +readObjectContent :: Handle -> ParsedResp -> IO L.ByteString +readObjectContent h (ParsedResp _ _ size) = do + content <- S.hGet h (fromIntegral size) + eatchar '\n' + return (L.fromChunks [content]) + where + eatchar expected = do + c <- hGetChar h + when (c /= expected) $ + error $ "missing " ++ (show expected) ++ " from git cat-file" +readObjectContent _ DNE = error "internal" + {- Gets the size and type of an object, without reading its content. -} catObjectMetaData :: CatFileHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType)) catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $ \from -> do @@ -180,14 +197,16 @@ querySingle o r repo reader = assertLocal repo $ , std_in = Inherit , std_out = CreatePipe } - pid <- createProcess p' - let h = stdoutHandle pid - output <- reader h - hClose h - ifM (checkSuccessProcess (processHandle pid)) + withCreateProcess p' go + where + go _ (Just outh) _ pid = do + output <- reader outh + hClose outh + ifM (checkSuccessProcess pid) ( return (Just output) , return Nothing ) + go _ _ _ _ = error "internal" querySize :: Ref -> Repo -> IO (Maybe FileSize) querySize r repo = maybe Nothing (readMaybe . takeWhile (/= '\n')) @@ -264,3 +283,123 @@ parseCommit b = Commit sp = fromIntegral (ord ' ') lt = fromIntegral (ord '<') gt = fromIntegral (ord '>') + +{- Uses cat-file to stream the contents of the files as efficiently + - as possible. This is much faster than querying it repeatedly per file. + -} +catObjectStreamLsTree + :: (MonadMask m, MonadIO m) + => [LsTree.TreeItem] + -> (LsTree.TreeItem -> Maybe v) + -> Repo + -> (IO (Maybe (v, Maybe L.ByteString)) -> m a) + -> m a +catObjectStreamLsTree l want repo reader = withCatFileStream False repo $ + \c hin hout -> bracketIO + (async $ feeder c hin) + cancel + (const (reader (catObjectReader readObjectContent c hout))) + where + feeder c h = do + forM_ l $ \ti -> case want ti of + Nothing -> return () + Just v -> do + let sha = LsTree.sha ti + liftIO $ writeChan c (sha, v) + S8.hPutStrLn h (fromRef' sha) + hClose h + +catObjectStream + :: (MonadMask m, MonadIO m) + => Repo + -> ( + ((v, Ref) -> IO ()) -- ^ call to feed values in + -> IO () -- call once all values are fed in + -> IO (Maybe (v, Maybe L.ByteString)) -- call to read results + -> m a + ) + -> m a +catObjectStream repo a = withCatFileStream False repo go + where + go c hin hout = a + (feeder c hin) + (hClose hin) + (catObjectReader readObjectContent c hout) + feeder c h (v, ref) = do + liftIO $ writeChan c (ref, v) + S8.hPutStrLn h (fromRef' ref) + +catObjectMetaDataStream + :: (MonadMask m, MonadIO m) + => Repo + -> ( + ((v, Ref) -> IO ()) -- ^ call to feed values in + -> IO () -- call once all values are fed in + -> IO (Maybe (v, Maybe (Sha, FileSize, ObjectType))) -- call to read results + -> m a + ) + -> m a +catObjectMetaDataStream repo a = withCatFileStream True repo go + where + go c hin hout = a + (feeder c hin) + (hClose hin) + (catObjectReader (\_h r -> pure (conv r)) c hout) + + feeder c h (v, ref) = do + liftIO $ writeChan c (ref, v) + S8.hPutStrLn h (fromRef' ref) + + conv (ParsedResp sha ty sz) = (sha, sz, ty) + conv DNE = error "internal" + +catObjectReader + :: (Handle -> ParsedResp -> IO t) + -> Chan (Ref, a) + -> Handle + -> IO (Maybe (a, Maybe t)) +catObjectReader getv c h = ifM (hIsEOF h) + ( return Nothing + , do + (ref, f) <- liftIO $ readChan c + resp <- S8.hGetLine h + case parseResp ref resp of + Just r@(ParsedResp {}) -> do + v <- getv h r + return (Just (f, Just v)) + Just DNE -> return (Just (f, Nothing)) + Nothing -> error $ "unknown response from git cat-file " ++ show resp + ) + +withCatFileStream + :: (MonadMask m, MonadIO m) + => Bool + -> Repo + -> (Chan v -> Handle -> Handle -> m a) + -> m a +withCatFileStream check repo reader = assertLocal repo $ + bracketIO start stop $ \(c, hin, hout, _) -> reader c hin hout + where + params = catMaybes + [ Just $ Param "cat-file" + , Just $ Param ("--batch" ++ (if check then "-check" else "") ++ "=" ++ batchFormat) + -- This option makes it faster, but is not present in + -- older versions of git. + , if BuildVersion.older "2.4.3" + then Nothing + else Just $ Param "--buffer" + ] + + start = do + let p = gitCreateProcess params repo + (Just hin, Just hout, _, pid) <- createProcess p + { std_in = CreatePipe + , std_out = CreatePipe + } + c <- newChan + return (c, hin, hout, pid) + + stop (_, hin, hout, pid) = do + hClose hin + hClose hout + void $ checkSuccessProcess pid -- cgit v1.2.3