diff options
Diffstat (limited to 'Git/CatFile.hs')
-rw-r--r-- | Git/CatFile.hs | 321 |
1 files changed, 242 insertions, 79 deletions
diff --git a/Git/CatFile.hs b/Git/CatFile.hs index 6402001..daa41ad 100644 --- a/Git/CatFile.hs +++ b/Git/CatFile.hs @@ -1,15 +1,22 @@ {- git cat-file interface - - - Copyright 2011-2019 Joey Hess <id@joeyh.name> + - Copyright 2011-2021 Joey Hess <id@joeyh.name> - - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE BangPatterns #-} + module Git.CatFile ( CatFileHandle, + CatFileMetaDataHandle, catFileStart, + catFileMetaDataStart, catFileStart', + catFileMetaDataStart', catFileStop, + catFileMetaDataStop, catFile, catFileDetails, catTree, @@ -17,18 +24,26 @@ module Git.CatFile ( catObject, catObjectDetails, catObjectMetaData, + catObjectStreamLsTree, + catObjectStream, + catObjectMetaDataStream, ) where import System.IO import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L -import qualified Data.ByteString.Lazy.Char8 as L8 -import qualified Data.Map as M +import qualified Data.ByteString.Char8 as S8 +import qualified Data.Attoparsec.ByteString as A +import qualified Data.Attoparsec.ByteString.Char8 as A8 +import qualified Data.Map.Strict as M import Data.String import Data.Char import Numeric import System.Posix.Types import Text.Read +import Control.Concurrent.Async +import Control.Concurrent.Chan +import Control.Monad.IO.Class (MonadIO) import Common import Git @@ -36,15 +51,20 @@ import Git.Sha import qualified Git.Ref import Git.Command import Git.Types -import Git.FilePath import Git.HashObject +import qualified Git.LsTree as LsTree import qualified Utility.CoProcess as CoProcess +import qualified Git.BuildVersion as BuildVersion import Utility.Tuple data CatFileHandle = CatFileHandle { catFileProcess :: CoProcess.CoProcessHandle - , checkFileProcess :: CoProcess.CoProcessHandle - , gitRepo :: Repo + , catFileGitRepo :: Repo + } + +data CatFileMetaDataHandle = CatFileMetaDataHandle + { checkFileProcess :: CoProcess.CoProcessHandle + , checkFileGitRepo :: Repo } catFileStart :: Repo -> IO CatFileHandle @@ -52,28 +72,40 @@ catFileStart = catFileStart' True catFileStart' :: Bool -> Repo -> IO CatFileHandle catFileStart' restartable repo = CatFileHandle - <$> startp "--batch" - <*> startp "--batch-check=%(objectname) %(objecttype) %(objectsize)" + <$> startcat restartable repo "--batch" <*> pure repo - where - startp p = gitCoProcessStart restartable - [ Param "cat-file" - , Param p - ] repo + +catFileMetaDataStart :: Repo -> IO CatFileMetaDataHandle +catFileMetaDataStart = catFileMetaDataStart' True + +catFileMetaDataStart' :: Bool -> Repo -> IO CatFileMetaDataHandle +catFileMetaDataStart' restartable repo = CatFileMetaDataHandle + <$> startcat restartable repo ("--batch-check=" ++ batchFormat) + <*> pure repo + +batchFormat :: String +batchFormat = "%(objectname) %(objecttype) %(objectsize)" + +startcat :: Bool -> Repo -> String -> IO CoProcess.CoProcessHandle +startcat restartable repo p = gitCoProcessStart restartable + [ Param "cat-file" + , Param p + ] repo catFileStop :: CatFileHandle -> IO () -catFileStop h = do - CoProcess.stop (catFileProcess h) - CoProcess.stop (checkFileProcess h) +catFileStop = CoProcess.stop . catFileProcess + +catFileMetaDataStop :: CatFileMetaDataHandle -> IO () +catFileMetaDataStop = CoProcess.stop . checkFileProcess {- Reads a file from a specified branch. -} catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString -catFile h branch file = catObject h $ Ref $ - fromRef branch ++ ":" ++ fromRawFilePath (toInternalGitPath file) +catFile h branch file = catObject h $ + Git.Ref.branchFileRef branch file catFileDetails :: CatFileHandle -> Branch -> RawFilePath -> IO (Maybe (L.ByteString, Sha, ObjectType)) -catFileDetails h branch file = catObjectDetails h $ Ref $ - fromRef branch ++ ":" ++ fromRawFilePath (toInternalGitPath file) +catFileDetails h branch file = catObjectDetails h $ + Git.Ref.branchFileRef branch file {- Uses a running git cat-file read the content of an object. - Objects that do not exist will have "" returned. -} @@ -82,80 +114,89 @@ catObject h object = maybe L.empty fst3 <$> catObjectDetails h object catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, ObjectType)) catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \from -> do - header <- hGetLine from + header <- S8.hGetLine from case parseResp object header of - Just (ParsedResp sha size objtype) -> do - content <- S.hGet from (fromIntegral size) - eatchar '\n' from - return $ Just (L.fromChunks [content], sha, objtype) + Just r@(ParsedResp sha objtype _size) -> do + content <- readObjectContent from r + return $ Just (content, sha, objtype) Just DNE -> return Nothing - Nothing -> error $ "unknown response from git cat-file " ++ show (header, object) + Nothing -> giveup $ "unknown response from git cat-file " ++ show (header, object) where - eatchar expected from = do - c <- hGetChar from - when (c /= expected) $ - error $ "missing " ++ (show expected) ++ " from git cat-file" - -- Slow fallback path for filenames containing newlines. - newlinefallback = queryObjectType object (gitRepo h) >>= \case + newlinefallback = queryObjectType object (catFileGitRepo h) >>= \case Nothing -> return Nothing - Just objtype -> queryContent object (gitRepo h) >>= \case + Just objtype -> queryContent object (catFileGitRepo h) >>= \case Nothing -> return Nothing Just content -> do -- only the --batch interface allows getting -- the sha, so have to re-hash the object sha <- hashObject' objtype (flip L.hPut content) - (gitRepo h) + (catFileGitRepo h) return (Just (content, sha, objtype)) +readObjectContent :: Handle -> ParsedResp -> IO L.ByteString +readObjectContent h (ParsedResp _ _ size) = do + content <- S.hGet h (fromIntegral size) + eatchar '\n' + return (L.fromChunks [content]) + where + eatchar expected = do + c <- hGetChar h + when (c /= expected) $ + giveup $ "missing " ++ (show expected) ++ " from git cat-file" +readObjectContent _ DNE = error "internal" + {- Gets the size and type of an object, without reading its content. -} -catObjectMetaData :: CatFileHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType)) +catObjectMetaData :: CatFileMetaDataHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType)) catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $ \from -> do - resp <- hGetLine from + resp <- S8.hGetLine from case parseResp object resp of - Just (ParsedResp sha size objtype) -> + Just (ParsedResp sha objtype size) -> return $ Just (sha, size, objtype) Just DNE -> return Nothing Nothing -> error $ "unknown response from git cat-file " ++ show (resp, object) where -- Slow fallback path for filenames containing newlines. newlinefallback = do - sha <- Git.Ref.sha object (gitRepo h) - sz <- querySize object (gitRepo h) - objtype <- queryObjectType object (gitRepo h) + sha <- Git.Ref.sha object (checkFileGitRepo h) + sz <- querySize object (checkFileGitRepo h) + objtype <- queryObjectType object (checkFileGitRepo h) return $ (,,) <$> sha <*> sz <*> objtype -data ParsedResp = ParsedResp Sha FileSize ObjectType | DNE +data ParsedResp = ParsedResp Sha ObjectType FileSize | DNE + deriving (Show) query :: CoProcess.CoProcessHandle -> Ref -> IO a -> (Handle -> IO a) -> IO a query hdl object newlinefallback receive -- git cat-file --batch uses a line based protocol, so when the -- filename itself contains a newline, have to fall back to another -- method of getting the information. - | '\n' `elem` s = newlinefallback + | '\n' `S8.elem` s = newlinefallback -- git strips carriage return from the end of a line, out of some -- misplaced desire to support windows, so also use the newline -- fallback for those. - | "\r" `isSuffixOf` s = newlinefallback + | "\r" `S8.isSuffixOf` s = newlinefallback | otherwise = CoProcess.query hdl send receive where - send to = hPutStrLn to s - s = fromRef object - -parseResp :: Ref -> String -> Maybe ParsedResp -parseResp object l - | " missing" `isSuffixOf` l -- less expensive than full check - && l == fromRef object ++ " missing" = Just DNE - | otherwise = case words l of - [sha, objtype, size] - | length sha == shaSize -> - case (readObjectType (encodeBS objtype), reads size) of - (Just t, [(bytes, "")]) -> - Just $ ParsedResp (Ref sha) bytes t - _ -> Nothing - | otherwise -> Nothing - _ -> Nothing + send to = S8.hPutStrLn to s + s = fromRef' object + +parseResp :: Ref -> S.ByteString -> Maybe ParsedResp +parseResp object s + | " missing" `S.isSuffixOf` s -- less expensive than full check + && s == fromRef' object <> " missing" = Just DNE + | otherwise = eitherToMaybe $ A.parseOnly respParser s + +respParser :: A.Parser ParsedResp +respParser = ParsedResp + <$> (maybe (fail "bad sha") return . extractSha =<< nextword) + <* A8.char ' ' + <*> (maybe (fail "bad object type") return . readObjectType =<< nextword) + <* A8.char ' ' + <*> A8.decimal + where + nextword = A8.takeTill (== ' ') querySingle :: CommandParam -> Ref -> Repo -> (Handle -> IO a) -> IO (Maybe a) querySingle o r repo reader = assertLocal repo $ @@ -173,14 +214,16 @@ querySingle o r repo reader = assertLocal repo $ , std_in = Inherit , std_out = CreatePipe } - pid <- createProcess p' - let h = stdoutHandle pid - output <- reader h - hClose h - ifM (checkSuccessProcess (processHandle pid)) + withCreateProcess p' go + where + go _ (Just outh) _ pid = do + output <- reader outh + hClose outh + ifM (checkSuccessProcess pid) ( return (Just output) , return Nothing ) + go _ _ _ _ = error "internal" querySize :: Ref -> Repo -> IO (Maybe FileSize) querySize r repo = maybe Nothing (readMaybe . takeWhile (/= '\n')) @@ -219,41 +262,161 @@ catTree h treeref = go <$> catObjectDetails h treeref catCommit :: CatFileHandle -> Ref -> IO (Maybe Commit) catCommit h commitref = go <$> catObjectDetails h commitref where - go (Just (b, _, CommitObject)) = parseCommit b + go (Just (b, _, CommitObject)) = parseCommit (L.toStrict b) go _ = Nothing -parseCommit :: L.ByteString -> Maybe Commit +parseCommit :: S.ByteString -> Maybe Commit parseCommit b = Commit - <$> (extractSha . L8.unpack =<< field "tree") - <*> Just (maybe [] (mapMaybe (extractSha . L8.unpack)) (fields "parent")) + <$> (extractSha =<< field "tree") + <*> Just (maybe [] (mapMaybe extractSha) (fields "parent")) <*> (parsemetadata <$> field "author") <*> (parsemetadata <$> field "committer") - <*> Just (L8.unpack $ L.intercalate (L.singleton nl) message) + <*> Just (decodeBS $ S.intercalate (S.singleton nl) message) where field n = headMaybe =<< fields n fields n = M.lookup (fromString n) fieldmap fieldmap = M.fromListWith (++) ((map breakfield) header) breakfield l = - let (k, sp_v) = L.break (== sp) l - in (k, [L.drop 1 sp_v]) - (header, message) = separate L.null ls - ls = L.split nl b + let (k, sp_v) = S.break (== sp) l + in (k, [S.drop 1 sp_v]) + (header, message) = separate S.null ls + ls = S.split nl b -- author and committer lines have the form: "name <email> date" -- The email is always present, even if empty "<>" parsemetadata l = CommitMetaData - { commitName = whenset $ L.init name_sp + { commitName = whenset $ S.init name_sp , commitEmail = whenset email - , commitDate = whenset $ L.drop 2 gt_sp_date + , commitDate = whenset $ S.drop 2 gt_sp_date } where - (name_sp, rest) = L.break (== lt) l - (email, gt_sp_date) = L.break (== gt) (L.drop 1 rest) + (name_sp, rest) = S.break (== lt) l + (email, gt_sp_date) = S.break (== gt) (S.drop 1 rest) whenset v - | L.null v = Nothing - | otherwise = Just (L8.unpack v) + | S.null v = Nothing + | otherwise = Just (decodeBS v) nl = fromIntegral (ord '\n') sp = fromIntegral (ord ' ') lt = fromIntegral (ord '<') gt = fromIntegral (ord '>') + +{- Uses cat-file to stream the contents of the files as efficiently + - as possible. This is much faster than querying it repeatedly per file. + -} +catObjectStreamLsTree + :: (MonadMask m, MonadIO m) + => [LsTree.TreeItem] + -> (LsTree.TreeItem -> Maybe v) + -> Repo + -> (IO (Maybe (v, Maybe L.ByteString)) -> m a) + -> m a +catObjectStreamLsTree l want repo reader = withCatFileStream False repo $ + \c hin hout -> bracketIO + (async $ feeder c hin) + cancel + (const (reader (catObjectReader readObjectContent c hout))) + where + feeder c h = do + forM_ l $ \ti -> case want ti of + Nothing -> return () + Just v -> do + let sha = LsTree.sha ti + liftIO $ writeChan c (sha, v) + S8.hPutStrLn h (fromRef' sha) + hClose h + +catObjectStream + :: (MonadMask m, MonadIO m) + => Repo + -> ( + ((v, Ref) -> IO ()) -- ^ call to feed values in + -> IO () -- call once all values are fed in + -> IO (Maybe (v, Maybe L.ByteString)) -- call to read results + -> m a + ) + -> m a +catObjectStream repo a = withCatFileStream False repo go + where + go c hin hout = a + (feeder c hin) + (hClose hin) + (catObjectReader readObjectContent c hout) + feeder c h (v, ref) = do + writeChan c (ref, v) + S8.hPutStrLn h (fromRef' ref) + +catObjectMetaDataStream + :: (MonadMask m, MonadIO m) + => Repo + -> ( + ((v, Ref) -> IO ()) -- ^ call to feed values in + -> IO () -- call once all values are fed in + -> IO (Maybe (v, Maybe (Sha, FileSize, ObjectType))) -- call to read results + -> m a + ) + -> m a +catObjectMetaDataStream repo a = withCatFileStream True repo go + where + go c hin hout = a + (feeder c hin) + (hClose hin) + (catObjectReader (\_h r -> pure (conv r)) c hout) + + feeder c h (v, ref) = do + liftIO $ writeChan c (ref, v) + S8.hPutStrLn h (fromRef' ref) + + conv (ParsedResp sha ty sz) = (sha, sz, ty) + conv DNE = error "internal" + +catObjectReader + :: (Handle -> ParsedResp -> IO t) + -> Chan (Ref, a) + -> Handle + -> IO (Maybe (a, Maybe t)) +catObjectReader getv c h = ifM (hIsEOF h) + ( return Nothing + , do + (ref, f) <- liftIO $ readChan c + resp <- S8.hGetLine h + case parseResp ref resp of + Just r@(ParsedResp {}) -> do + v <- getv h r + return (Just (f, Just v)) + Just DNE -> return (Just (f, Nothing)) + Nothing -> error $ "unknown response from git cat-file " ++ show resp + ) + +withCatFileStream + :: (MonadMask m, MonadIO m) + => Bool + -> Repo + -> (Chan v -> Handle -> Handle -> m a) + -> m a +withCatFileStream check repo reader = assertLocal repo $ + bracketIO start stop $ \(c, hin, hout, _) -> reader c hin hout + where + params = catMaybes + [ Just $ Param "cat-file" + , Just $ Param ("--batch" ++ (if check then "-check" else "") ++ "=" ++ batchFormat) + -- This option makes it faster, but is not present in + -- older versions of git. + , if BuildVersion.older "2.4.3" + then Nothing + else Just $ Param "--buffer" + ] + + start = do + let p = gitCreateProcess params repo + (Just hin, Just hout, _, pid) <- createProcess p + { std_in = CreatePipe + , std_out = CreatePipe + } + c <- newChan + return (c, hin, hout, pid) + + stop (_, hin, hout, pid) = do + hClose hin + hClose hout + void $ checkSuccessProcess pid |