{- git ls-files interface - - Copyright 2010-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} {-# LANGUAGE OverloadedStrings #-} module Git.LsFiles ( Options(..), inRepo, inRepoDetails, inRepoOrBranch, notInRepo, notInRepoIncludingEmptyDirectories, allFiles, deleted, modified, staged, stagedNotDeleted, usualStageNum, mergeConflictHeadStageNum, stagedDetails, typeChanged, typeChangedStaged, Conflicting(..), Unmerged(..), unmerged, StagedDetails, inodeCaches, ) where import Common import Git import Git.Command import Git.Types import Git.Sha import Utility.InodeCache import Utility.TimeStamp import Utility.Attoparsec import qualified Utility.RawFilePath as R import System.Posix.Types import qualified Data.Map as M import qualified Data.ByteString as S import qualified Data.Attoparsec.ByteString as A import qualified Data.Attoparsec.ByteString.Char8 as A8 import qualified System.FilePath.ByteString as P {- It's only safe to use git ls-files on the current repo, not on a remote. - - Git has some strange behavior when git ls-files is used with repos - that are not the one that the cwd is in: - git --git-dir=../foo/.git --worktree=../foo ../foo fails saying - "../foo is outside repository". - That does not happen when an absolute path is provided. - - Also, the files output by ls-files are relative to the cwd. - Unless it's run on remote. Then it's relative to the top of the remote - repo. - - So, best to avoid that class of problems. -} safeForLsFiles :: Repo -> Bool safeForLsFiles r = isNothing (remoteName r) guardSafeForLsFiles :: Repo -> IO a -> IO a guardSafeForLsFiles r a | safeForLsFiles r = a | otherwise = giveup $ "git ls-files is unsafe to run on repository " ++ repoDescribe r data Options = ErrorUnmatch opParam :: Options -> CommandParam opParam ErrorUnmatch = Param "--error-unmatch" {- Lists files that are checked into git's index at the specified paths. - With no paths, all files are listed. -} inRepo :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) inRepo = inRepo' [Param "--cached"] inRepo' :: [CommandParam] -> [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) inRepo' ps os l repo = guardSafeForLsFiles repo $ pipeNullSplit' params repo where params = Param "ls-files" : Param "-z" : map opParam os ++ ps ++ (Param "--" : map (File . fromRawFilePath) l) {- Lists the same files inRepo does, but with sha and mode. -} inRepoDetails :: [Options] -> [RawFilePath] -> Repo -> IO ([(RawFilePath, Sha, FileMode)], IO Bool) inRepoDetails = stagedDetails' parser . map opParam where parser s = case parseStagedDetails s of Just (file, sha, mode, stagenum) | stagenum == usualStageNum || stagenum == mergeConflictHeadStageNum -> Just (file, sha, mode) _ -> Nothing {- Files that are checked into the index or have been committed to a - branch. -} inRepoOrBranch :: Branch -> [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) inRepoOrBranch b = inRepo' [ Param "--cached" , Param ("--with-tree=" ++ fromRef b) ] {- Scans for files at the specified locations that are not checked into git. -} notInRepo :: [Options] -> Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) notInRepo = notInRepo' [] notInRepo' :: [CommandParam] -> [Options] -> Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) notInRepo' ps os include_ignored = inRepo' (Param "--others" : ps ++ exclude) os where exclude | include_ignored = [] | otherwise = [Param "--exclude-standard"] {- Scans for files at the specified locations that are not checked into - git. Empty directories are included in the result. -} notInRepoIncludingEmptyDirectories :: [Options] -> Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) notInRepoIncludingEmptyDirectories = notInRepo' [Param "--directory"] {- Finds all files in the specified locations, whether checked into git or - not. -} allFiles :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) allFiles = inRepo' [Param "--cached", Param "--others"] {- Returns a list of files in the specified locations that have been - deleted. -} deleted :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) deleted = inRepo' [Param "--deleted"] {- Returns a list of files in the specified locations that have been - modified. -} modified :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) modified = inRepo' [Param "--modified"] {- Returns a list of all files that are staged for commit. -} staged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) staged = staged' [] {- Returns a list of the files, staged for commit, that are being added, - moved, or changed (but not deleted), from the specified locations. -} stagedNotDeleted :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) stagedNotDeleted = staged' [Param "--diff-filter=ACMRT"] staged' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) staged' ps l repo = guardSafeForLsFiles repo $ pipeNullSplit' (prefix ++ ps ++ suffix) repo where prefix = [Param "diff", Param "--cached", Param "--name-only", Param "-z"] suffix = Param "--" : map (File . fromRawFilePath) l type StagedDetails = (RawFilePath, Sha, FileMode, StageNum) type StageNum = Int {- Used when not in a merge conflict. -} usualStageNum :: Int usualStageNum = 0 {- WHen in a merge conflict, git uses stage number 2 for the local HEAD - side of the merge conflict. -} mergeConflictHeadStageNum :: Int mergeConflictHeadStageNum = 2 {- Returns details about all files that are staged in the index. - - Note that, during a conflict, a file will appear in the list - more than once with different stage numbers. -} stagedDetails :: [RawFilePath] -> Repo -> IO ([StagedDetails], IO Bool) stagedDetails = stagedDetails' parseStagedDetails [] stagedDetails' :: (S.ByteString -> Maybe t) -> [CommandParam] -> [RawFilePath] -> Repo -> IO ([t], IO Bool) stagedDetails' parser ps l repo = guardSafeForLsFiles repo $ do (ls, cleanup) <- pipeNullSplit' params repo return (mapMaybe parser ls, cleanup) where params = Param "ls-files" : Param "--stage" : Param "-z" : ps ++ Param "--" : map (File . fromRawFilePath) l parseStagedDetails :: S.ByteString -> Maybe StagedDetails parseStagedDetails = eitherToMaybe . A.parseOnly parser where parser = do mode <- octal void $ A8.char ' ' sha <- maybe (fail "bad sha") return . extractSha =<< nextword void $ A8.char ' ' stagenum <- A8.decimal void $ A8.char '\t' file <- A.takeByteString return (file, sha, mode, stagenum) nextword = A8.takeTill (== ' ') {- Returns a list of the files in the specified locations that are staged - for commit, and whose type has changed. -} typeChangedStaged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) typeChangedStaged = typeChanged' [Param "--cached"] {- Returns a list of the files in the specified locations whose type has - changed. Files only staged for commit will not be included. -} typeChanged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) typeChanged = typeChanged' [] typeChanged' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) typeChanged' ps l repo = guardSafeForLsFiles repo $ do (fs, cleanup) <- pipeNullSplit' (prefix ++ ps ++ suffix) repo -- git diff returns filenames relative to the top of the git repo; -- convert to filenames relative to the cwd, like git ls-files. top <- absPath (repoPath repo) currdir <- R.getCurrentDirectory return (map (\f -> relPathDirToFileAbs currdir $ top P. f) fs, cleanup) where prefix = [ Param "diff" , Param "--name-only" , Param "--diff-filter=T" , Param "-z" ] suffix = Param "--" : (if null l then [File "."] else map (File . fromRawFilePath) l) {- A item in conflict has two possible values. - Either can be Nothing, when that side deleted the file. -} data Conflicting v = Conflicting { valUs :: Maybe v , valThem :: Maybe v } deriving (Show) data Unmerged = Unmerged { unmergedFile :: RawFilePath , unmergedTreeItemType :: Conflicting TreeItemType , unmergedSha :: Conflicting Sha , unmergedSiblingFile :: Maybe RawFilePath -- ^ Normally this is Nothing, because a -- merge conflict is represented as a single file with two -- stages. However, git resolvers sometimes choose to stage -- two files, one for each side of the merge conflict. In such a case, -- this is used for the name of the second file, which is related -- to the first file. (Eg, "foo" and "foo~ref") } deriving (Show) {- Returns a list of the files in the specified locations that have - unresolved merge conflicts. - - ls-files outputs multiple lines per conflicting file, each with its own - stage number: - 1 = old version, can be ignored - 2 = us - 3 = them - If line 2 or 3 is omitted, that side removed the file. -} unmerged :: [RawFilePath] -> Repo -> IO ([Unmerged], IO Bool) unmerged l repo = guardSafeForLsFiles repo $ do (fs, cleanup) <- pipeNullSplit params repo return (reduceUnmerged [] $ catMaybes $ map (parseUnmerged . decodeBL) fs, cleanup) where params = Param "ls-files" : Param "--unmerged" : Param "-z" : Param "--" : map (File . fromRawFilePath) l data InternalUnmerged = InternalUnmerged { isus :: Bool , ifile :: RawFilePath , itreeitemtype :: Maybe TreeItemType , isha :: Maybe Sha } deriving (Show) parseUnmerged :: String -> Maybe InternalUnmerged parseUnmerged s | null file = Nothing | otherwise = case words metadata of (rawtreeitemtype:rawsha:rawstage:_) -> do stage <- readish rawstage :: Maybe Int if stage /= 2 && stage /= 3 then Nothing else do treeitemtype <- readTreeItemType (encodeBS rawtreeitemtype) sha <- extractSha (encodeBS rawsha) return $ InternalUnmerged (stage == 2) (toRawFilePath file) (Just treeitemtype) (Just sha) _ -> Nothing where (metadata, file) = separate (== '\t') s reduceUnmerged :: [Unmerged] -> [InternalUnmerged] -> [Unmerged] reduceUnmerged c [] = c reduceUnmerged c (i:is) = reduceUnmerged (new:c) rest where (rest, sibi) = findsib i is (treeitemtypeA, treeitemtypeB, shaA, shaB) | isus i = (itreeitemtype i, itreeitemtype sibi, isha i, isha sibi) | otherwise = (itreeitemtype sibi, itreeitemtype i, isha sibi, isha i) new = Unmerged { unmergedFile = ifile i , unmergedTreeItemType = Conflicting treeitemtypeA treeitemtypeB , unmergedSha = Conflicting shaA shaB , unmergedSiblingFile = if ifile sibi == ifile i then Nothing else Just (ifile sibi) } findsib templatei [] = ([], removed templatei) findsib templatei (l:ls) | ifile l == ifile templatei || issibfile templatei l = (ls, l) | otherwise = (l:ls, removed templatei) removed templatei = templatei { isus = not (isus templatei) , itreeitemtype = Nothing , isha = Nothing } -- foo~ are unmerged sibling files of foo -- Some versions or resolvers of git stage the sibling files, -- other versions or resolvers do not. issibfile x y = (ifile x <> "~") `S.isPrefixOf` ifile y && isus x || isus y && not (isus x && isus y) {- Gets the InodeCache equivalent information stored in the git index. - - Note that this uses a --debug option whose output could change at some - point in the future. If the output is not as expected, will use Nothing. -} inodeCaches :: [RawFilePath] -> Repo -> IO ([(FilePath, Maybe InodeCache)], IO Bool) inodeCaches locs repo = guardSafeForLsFiles repo $ do (ls, cleanup) <- pipeNullSplit params repo return (parse Nothing (map decodeBL ls), cleanup) where params = Param "ls-files" : Param "--cached" : Param "-z" : Param "--debug" : Param "--" : map (File . fromRawFilePath) locs parse Nothing (f:ls) = parse (Just f) ls parse (Just f) (s:[]) = let i = parsedebug s in (f, i) : [] parse (Just f) (s:ls) = let (d, f') = splitdebug s i = parsedebug d in (f, i) : parse (Just f') ls parse _ _ = [] -- First 5 lines are --debug output, remainder is the next filename. -- This assumes that --debug does not start outputting more lines. splitdebug s = case splitc '\n' s of (d1:d2:d3:d4:d5:rest) -> ( intercalate "\n" [d1, d2, d3, d4, d5] , intercalate "\n" rest ) _ -> ("", s) -- This parser allows for some changes to the --debug output, -- including reordering, or adding more items. parsedebug s = do let l = words s let iskey v = ":" `isSuffixOf` v let m = M.fromList $ zip (filter iskey l) (filter (not . iskey) l) mkInodeCache <$> (readish =<< M.lookup "ino:" m) <*> (readish =<< M.lookup "size:" m) <*> (parsePOSIXTime =<< (replace ":" "." <$> M.lookup "mtime:" m))