summaryrefslogtreecommitdiff
path: root/Git/CatFile.hs
diff options
context:
space:
mode:
Diffstat (limited to 'Git/CatFile.hs')
-rw-r--r--Git/CatFile.hs321
1 files changed, 242 insertions, 79 deletions
diff --git a/Git/CatFile.hs b/Git/CatFile.hs
index 6402001..daa41ad 100644
--- a/Git/CatFile.hs
+++ b/Git/CatFile.hs
@@ -1,15 +1,22 @@
{- git cat-file interface
-
- - Copyright 2011-2019 Joey Hess <id@joeyh.name>
+ - Copyright 2011-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE BangPatterns #-}
+
module Git.CatFile (
CatFileHandle,
+ CatFileMetaDataHandle,
catFileStart,
+ catFileMetaDataStart,
catFileStart',
+ catFileMetaDataStart',
catFileStop,
+ catFileMetaDataStop,
catFile,
catFileDetails,
catTree,
@@ -17,18 +24,26 @@ module Git.CatFile (
catObject,
catObjectDetails,
catObjectMetaData,
+ catObjectStreamLsTree,
+ catObjectStream,
+ catObjectMetaDataStream,
) where
import System.IO
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
-import qualified Data.ByteString.Lazy.Char8 as L8
-import qualified Data.Map as M
+import qualified Data.ByteString.Char8 as S8
+import qualified Data.Attoparsec.ByteString as A
+import qualified Data.Attoparsec.ByteString.Char8 as A8
+import qualified Data.Map.Strict as M
import Data.String
import Data.Char
import Numeric
import System.Posix.Types
import Text.Read
+import Control.Concurrent.Async
+import Control.Concurrent.Chan
+import Control.Monad.IO.Class (MonadIO)
import Common
import Git
@@ -36,15 +51,20 @@ import Git.Sha
import qualified Git.Ref
import Git.Command
import Git.Types
-import Git.FilePath
import Git.HashObject
+import qualified Git.LsTree as LsTree
import qualified Utility.CoProcess as CoProcess
+import qualified Git.BuildVersion as BuildVersion
import Utility.Tuple
data CatFileHandle = CatFileHandle
{ catFileProcess :: CoProcess.CoProcessHandle
- , checkFileProcess :: CoProcess.CoProcessHandle
- , gitRepo :: Repo
+ , catFileGitRepo :: Repo
+ }
+
+data CatFileMetaDataHandle = CatFileMetaDataHandle
+ { checkFileProcess :: CoProcess.CoProcessHandle
+ , checkFileGitRepo :: Repo
}
catFileStart :: Repo -> IO CatFileHandle
@@ -52,28 +72,40 @@ catFileStart = catFileStart' True
catFileStart' :: Bool -> Repo -> IO CatFileHandle
catFileStart' restartable repo = CatFileHandle
- <$> startp "--batch"
- <*> startp "--batch-check=%(objectname) %(objecttype) %(objectsize)"
+ <$> startcat restartable repo "--batch"
<*> pure repo
- where
- startp p = gitCoProcessStart restartable
- [ Param "cat-file"
- , Param p
- ] repo
+
+catFileMetaDataStart :: Repo -> IO CatFileMetaDataHandle
+catFileMetaDataStart = catFileMetaDataStart' True
+
+catFileMetaDataStart' :: Bool -> Repo -> IO CatFileMetaDataHandle
+catFileMetaDataStart' restartable repo = CatFileMetaDataHandle
+ <$> startcat restartable repo ("--batch-check=" ++ batchFormat)
+ <*> pure repo
+
+batchFormat :: String
+batchFormat = "%(objectname) %(objecttype) %(objectsize)"
+
+startcat :: Bool -> Repo -> String -> IO CoProcess.CoProcessHandle
+startcat restartable repo p = gitCoProcessStart restartable
+ [ Param "cat-file"
+ , Param p
+ ] repo
catFileStop :: CatFileHandle -> IO ()
-catFileStop h = do
- CoProcess.stop (catFileProcess h)
- CoProcess.stop (checkFileProcess h)
+catFileStop = CoProcess.stop . catFileProcess
+
+catFileMetaDataStop :: CatFileMetaDataHandle -> IO ()
+catFileMetaDataStop = CoProcess.stop . checkFileProcess
{- Reads a file from a specified branch. -}
catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString
-catFile h branch file = catObject h $ Ref $
- fromRef branch ++ ":" ++ fromRawFilePath (toInternalGitPath file)
+catFile h branch file = catObject h $
+ Git.Ref.branchFileRef branch file
catFileDetails :: CatFileHandle -> Branch -> RawFilePath -> IO (Maybe (L.ByteString, Sha, ObjectType))
-catFileDetails h branch file = catObjectDetails h $ Ref $
- fromRef branch ++ ":" ++ fromRawFilePath (toInternalGitPath file)
+catFileDetails h branch file = catObjectDetails h $
+ Git.Ref.branchFileRef branch file
{- Uses a running git cat-file read the content of an object.
- Objects that do not exist will have "" returned. -}
@@ -82,80 +114,89 @@ catObject h object = maybe L.empty fst3 <$> catObjectDetails h object
catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, ObjectType))
catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \from -> do
- header <- hGetLine from
+ header <- S8.hGetLine from
case parseResp object header of
- Just (ParsedResp sha size objtype) -> do
- content <- S.hGet from (fromIntegral size)
- eatchar '\n' from
- return $ Just (L.fromChunks [content], sha, objtype)
+ Just r@(ParsedResp sha objtype _size) -> do
+ content <- readObjectContent from r
+ return $ Just (content, sha, objtype)
Just DNE -> return Nothing
- Nothing -> error $ "unknown response from git cat-file " ++ show (header, object)
+ Nothing -> giveup $ "unknown response from git cat-file " ++ show (header, object)
where
- eatchar expected from = do
- c <- hGetChar from
- when (c /= expected) $
- error $ "missing " ++ (show expected) ++ " from git cat-file"
-
-- Slow fallback path for filenames containing newlines.
- newlinefallback = queryObjectType object (gitRepo h) >>= \case
+ newlinefallback = queryObjectType object (catFileGitRepo h) >>= \case
Nothing -> return Nothing
- Just objtype -> queryContent object (gitRepo h) >>= \case
+ Just objtype -> queryContent object (catFileGitRepo h) >>= \case
Nothing -> return Nothing
Just content -> do
-- only the --batch interface allows getting
-- the sha, so have to re-hash the object
sha <- hashObject' objtype
(flip L.hPut content)
- (gitRepo h)
+ (catFileGitRepo h)
return (Just (content, sha, objtype))
+readObjectContent :: Handle -> ParsedResp -> IO L.ByteString
+readObjectContent h (ParsedResp _ _ size) = do
+ content <- S.hGet h (fromIntegral size)
+ eatchar '\n'
+ return (L.fromChunks [content])
+ where
+ eatchar expected = do
+ c <- hGetChar h
+ when (c /= expected) $
+ giveup $ "missing " ++ (show expected) ++ " from git cat-file"
+readObjectContent _ DNE = error "internal"
+
{- Gets the size and type of an object, without reading its content. -}
-catObjectMetaData :: CatFileHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType))
+catObjectMetaData :: CatFileMetaDataHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType))
catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $ \from -> do
- resp <- hGetLine from
+ resp <- S8.hGetLine from
case parseResp object resp of
- Just (ParsedResp sha size objtype) ->
+ Just (ParsedResp sha objtype size) ->
return $ Just (sha, size, objtype)
Just DNE -> return Nothing
Nothing -> error $ "unknown response from git cat-file " ++ show (resp, object)
where
-- Slow fallback path for filenames containing newlines.
newlinefallback = do
- sha <- Git.Ref.sha object (gitRepo h)
- sz <- querySize object (gitRepo h)
- objtype <- queryObjectType object (gitRepo h)
+ sha <- Git.Ref.sha object (checkFileGitRepo h)
+ sz <- querySize object (checkFileGitRepo h)
+ objtype <- queryObjectType object (checkFileGitRepo h)
return $ (,,) <$> sha <*> sz <*> objtype
-data ParsedResp = ParsedResp Sha FileSize ObjectType | DNE
+data ParsedResp = ParsedResp Sha ObjectType FileSize | DNE
+ deriving (Show)
query :: CoProcess.CoProcessHandle -> Ref -> IO a -> (Handle -> IO a) -> IO a
query hdl object newlinefallback receive
-- git cat-file --batch uses a line based protocol, so when the
-- filename itself contains a newline, have to fall back to another
-- method of getting the information.
- | '\n' `elem` s = newlinefallback
+ | '\n' `S8.elem` s = newlinefallback
-- git strips carriage return from the end of a line, out of some
-- misplaced desire to support windows, so also use the newline
-- fallback for those.
- | "\r" `isSuffixOf` s = newlinefallback
+ | "\r" `S8.isSuffixOf` s = newlinefallback
| otherwise = CoProcess.query hdl send receive
where
- send to = hPutStrLn to s
- s = fromRef object
-
-parseResp :: Ref -> String -> Maybe ParsedResp
-parseResp object l
- | " missing" `isSuffixOf` l -- less expensive than full check
- && l == fromRef object ++ " missing" = Just DNE
- | otherwise = case words l of
- [sha, objtype, size]
- | length sha == shaSize ->
- case (readObjectType (encodeBS objtype), reads size) of
- (Just t, [(bytes, "")]) ->
- Just $ ParsedResp (Ref sha) bytes t
- _ -> Nothing
- | otherwise -> Nothing
- _ -> Nothing
+ send to = S8.hPutStrLn to s
+ s = fromRef' object
+
+parseResp :: Ref -> S.ByteString -> Maybe ParsedResp
+parseResp object s
+ | " missing" `S.isSuffixOf` s -- less expensive than full check
+ && s == fromRef' object <> " missing" = Just DNE
+ | otherwise = eitherToMaybe $ A.parseOnly respParser s
+
+respParser :: A.Parser ParsedResp
+respParser = ParsedResp
+ <$> (maybe (fail "bad sha") return . extractSha =<< nextword)
+ <* A8.char ' '
+ <*> (maybe (fail "bad object type") return . readObjectType =<< nextword)
+ <* A8.char ' '
+ <*> A8.decimal
+ where
+ nextword = A8.takeTill (== ' ')
querySingle :: CommandParam -> Ref -> Repo -> (Handle -> IO a) -> IO (Maybe a)
querySingle o r repo reader = assertLocal repo $
@@ -173,14 +214,16 @@ querySingle o r repo reader = assertLocal repo $
, std_in = Inherit
, std_out = CreatePipe
}
- pid <- createProcess p'
- let h = stdoutHandle pid
- output <- reader h
- hClose h
- ifM (checkSuccessProcess (processHandle pid))
+ withCreateProcess p' go
+ where
+ go _ (Just outh) _ pid = do
+ output <- reader outh
+ hClose outh
+ ifM (checkSuccessProcess pid)
( return (Just output)
, return Nothing
)
+ go _ _ _ _ = error "internal"
querySize :: Ref -> Repo -> IO (Maybe FileSize)
querySize r repo = maybe Nothing (readMaybe . takeWhile (/= '\n'))
@@ -219,41 +262,161 @@ catTree h treeref = go <$> catObjectDetails h treeref
catCommit :: CatFileHandle -> Ref -> IO (Maybe Commit)
catCommit h commitref = go <$> catObjectDetails h commitref
where
- go (Just (b, _, CommitObject)) = parseCommit b
+ go (Just (b, _, CommitObject)) = parseCommit (L.toStrict b)
go _ = Nothing
-parseCommit :: L.ByteString -> Maybe Commit
+parseCommit :: S.ByteString -> Maybe Commit
parseCommit b = Commit
- <$> (extractSha . L8.unpack =<< field "tree")
- <*> Just (maybe [] (mapMaybe (extractSha . L8.unpack)) (fields "parent"))
+ <$> (extractSha =<< field "tree")
+ <*> Just (maybe [] (mapMaybe extractSha) (fields "parent"))
<*> (parsemetadata <$> field "author")
<*> (parsemetadata <$> field "committer")
- <*> Just (L8.unpack $ L.intercalate (L.singleton nl) message)
+ <*> Just (decodeBS $ S.intercalate (S.singleton nl) message)
where
field n = headMaybe =<< fields n
fields n = M.lookup (fromString n) fieldmap
fieldmap = M.fromListWith (++) ((map breakfield) header)
breakfield l =
- let (k, sp_v) = L.break (== sp) l
- in (k, [L.drop 1 sp_v])
- (header, message) = separate L.null ls
- ls = L.split nl b
+ let (k, sp_v) = S.break (== sp) l
+ in (k, [S.drop 1 sp_v])
+ (header, message) = separate S.null ls
+ ls = S.split nl b
-- author and committer lines have the form: "name <email> date"
-- The email is always present, even if empty "<>"
parsemetadata l = CommitMetaData
- { commitName = whenset $ L.init name_sp
+ { commitName = whenset $ S.init name_sp
, commitEmail = whenset email
- , commitDate = whenset $ L.drop 2 gt_sp_date
+ , commitDate = whenset $ S.drop 2 gt_sp_date
}
where
- (name_sp, rest) = L.break (== lt) l
- (email, gt_sp_date) = L.break (== gt) (L.drop 1 rest)
+ (name_sp, rest) = S.break (== lt) l
+ (email, gt_sp_date) = S.break (== gt) (S.drop 1 rest)
whenset v
- | L.null v = Nothing
- | otherwise = Just (L8.unpack v)
+ | S.null v = Nothing
+ | otherwise = Just (decodeBS v)
nl = fromIntegral (ord '\n')
sp = fromIntegral (ord ' ')
lt = fromIntegral (ord '<')
gt = fromIntegral (ord '>')
+
+{- Uses cat-file to stream the contents of the files as efficiently
+ - as possible. This is much faster than querying it repeatedly per file.
+ -}
+catObjectStreamLsTree
+ :: (MonadMask m, MonadIO m)
+ => [LsTree.TreeItem]
+ -> (LsTree.TreeItem -> Maybe v)
+ -> Repo
+ -> (IO (Maybe (v, Maybe L.ByteString)) -> m a)
+ -> m a
+catObjectStreamLsTree l want repo reader = withCatFileStream False repo $
+ \c hin hout -> bracketIO
+ (async $ feeder c hin)
+ cancel
+ (const (reader (catObjectReader readObjectContent c hout)))
+ where
+ feeder c h = do
+ forM_ l $ \ti -> case want ti of
+ Nothing -> return ()
+ Just v -> do
+ let sha = LsTree.sha ti
+ liftIO $ writeChan c (sha, v)
+ S8.hPutStrLn h (fromRef' sha)
+ hClose h
+
+catObjectStream
+ :: (MonadMask m, MonadIO m)
+ => Repo
+ -> (
+ ((v, Ref) -> IO ()) -- ^ call to feed values in
+ -> IO () -- call once all values are fed in
+ -> IO (Maybe (v, Maybe L.ByteString)) -- call to read results
+ -> m a
+ )
+ -> m a
+catObjectStream repo a = withCatFileStream False repo go
+ where
+ go c hin hout = a
+ (feeder c hin)
+ (hClose hin)
+ (catObjectReader readObjectContent c hout)
+ feeder c h (v, ref) = do
+ writeChan c (ref, v)
+ S8.hPutStrLn h (fromRef' ref)
+
+catObjectMetaDataStream
+ :: (MonadMask m, MonadIO m)
+ => Repo
+ -> (
+ ((v, Ref) -> IO ()) -- ^ call to feed values in
+ -> IO () -- call once all values are fed in
+ -> IO (Maybe (v, Maybe (Sha, FileSize, ObjectType))) -- call to read results
+ -> m a
+ )
+ -> m a
+catObjectMetaDataStream repo a = withCatFileStream True repo go
+ where
+ go c hin hout = a
+ (feeder c hin)
+ (hClose hin)
+ (catObjectReader (\_h r -> pure (conv r)) c hout)
+
+ feeder c h (v, ref) = do
+ liftIO $ writeChan c (ref, v)
+ S8.hPutStrLn h (fromRef' ref)
+
+ conv (ParsedResp sha ty sz) = (sha, sz, ty)
+ conv DNE = error "internal"
+
+catObjectReader
+ :: (Handle -> ParsedResp -> IO t)
+ -> Chan (Ref, a)
+ -> Handle
+ -> IO (Maybe (a, Maybe t))
+catObjectReader getv c h = ifM (hIsEOF h)
+ ( return Nothing
+ , do
+ (ref, f) <- liftIO $ readChan c
+ resp <- S8.hGetLine h
+ case parseResp ref resp of
+ Just r@(ParsedResp {}) -> do
+ v <- getv h r
+ return (Just (f, Just v))
+ Just DNE -> return (Just (f, Nothing))
+ Nothing -> error $ "unknown response from git cat-file " ++ show resp
+ )
+
+withCatFileStream
+ :: (MonadMask m, MonadIO m)
+ => Bool
+ -> Repo
+ -> (Chan v -> Handle -> Handle -> m a)
+ -> m a
+withCatFileStream check repo reader = assertLocal repo $
+ bracketIO start stop $ \(c, hin, hout, _) -> reader c hin hout
+ where
+ params = catMaybes
+ [ Just $ Param "cat-file"
+ , Just $ Param ("--batch" ++ (if check then "-check" else "") ++ "=" ++ batchFormat)
+ -- This option makes it faster, but is not present in
+ -- older versions of git.
+ , if BuildVersion.older "2.4.3"
+ then Nothing
+ else Just $ Param "--buffer"
+ ]
+
+ start = do
+ let p = gitCreateProcess params repo
+ (Just hin, Just hout, _, pid) <- createProcess p
+ { std_in = CreatePipe
+ , std_out = CreatePipe
+ }
+ c <- newChan
+ return (c, hin, hout, pid)
+
+ stop (_, hin, hout, pid) = do
+ hClose hin
+ hClose hout
+ void $ checkSuccessProcess pid