summaryrefslogtreecommitdiff
path: root/Git/LsFiles.hs
blob: 4eea39541a3ec824149fe81ca7c101673fbc827d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
{- git ls-files interface
 -
 - Copyright 2010-2020 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}

{-# LANGUAGE OverloadedStrings #-}

module Git.LsFiles (
	Options(..),
	inRepo,
	inRepoDetails,
	inRepoOrBranch,
	notInRepo,
	notInRepoIncludingEmptyDirectories,
	allFiles,
	deleted,
	modified,
	staged,
	stagedNotDeleted,
	usualStageNum,
	mergeConflictHeadStageNum,
	stagedDetails,
	typeChanged,
	typeChangedStaged,
	Conflicting(..),
	Unmerged(..),
	unmerged,
	StagedDetails,
	inodeCaches,
) where

import Common
import Git
import Git.Command
import Git.Types
import Git.Sha
import Utility.InodeCache
import Utility.TimeStamp
import Utility.Attoparsec
import qualified Utility.RawFilePath as R

import System.Posix.Types
import qualified Data.Map as M
import qualified Data.ByteString as S
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import qualified System.FilePath.ByteString as P

{- It's only safe to use git ls-files on the current repo, not on a remote.
 -
 - Git has some strange behavior when git ls-files is used with repos
 - that are not the one that the cwd is in:
 - git --git-dir=../foo/.git --worktree=../foo ../foo fails saying 
 - "../foo is outside repository".
 - That does not happen when an absolute path is provided.
 -
 - Also, the files output by ls-files are relative to the cwd. 
 - Unless it's run on remote. Then it's relative to the top of the remote
 - repo.
 -
 - So, best to avoid that class of problems.
 -}
safeForLsFiles :: Repo -> Bool
safeForLsFiles r = isNothing (remoteName r)

guardSafeForLsFiles :: Repo -> IO a -> IO a
guardSafeForLsFiles r a
	| safeForLsFiles r = a
	| otherwise = giveup $ "git ls-files is unsafe to run on repository " ++ repoDescribe r

data Options = ErrorUnmatch

opParam :: Options -> CommandParam
opParam ErrorUnmatch = Param "--error-unmatch"

{- Lists files that are checked into git's index at the specified paths.
 - With no paths, all files are listed.
 -}
inRepo :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
inRepo = inRepo' [Param "--cached"] 

inRepo' :: [CommandParam] -> [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
inRepo' ps os l repo = guardSafeForLsFiles repo $ pipeNullSplit' params repo
  where
	params = 
		Param "ls-files" :
		Param "-z" :
		map opParam os ++ ps ++
		(Param "--" : map (File . fromRawFilePath) l)

{- Lists the same files inRepo does, but with sha and mode. -}
inRepoDetails :: [Options] -> [RawFilePath] -> Repo -> IO ([(RawFilePath, Sha, FileMode)], IO Bool)
inRepoDetails = stagedDetails' parser . map opParam
  where
	parser s = case parseStagedDetails s of
		Just (file, sha, mode, stagenum)
			| stagenum == usualStageNum || stagenum == mergeConflictHeadStageNum ->
				Just (file, sha, mode)
		_ -> Nothing

{- Files that are checked into the index or have been committed to a
 - branch. -}
inRepoOrBranch :: Branch -> [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
inRepoOrBranch b = inRepo'
	[ Param "--cached"
	, Param ("--with-tree=" ++ fromRef b)
	]

{- Scans for files at the specified locations that are not checked into git. -}
notInRepo :: [Options] -> Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
notInRepo = notInRepo' []

notInRepo' :: [CommandParam] -> [Options] -> Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
notInRepo' ps os include_ignored = 
	inRepo' (Param "--others" : ps ++ exclude) os
  where
	exclude
		| include_ignored = []
		| otherwise = [Param "--exclude-standard"]

{- Scans for files at the specified locations that are not checked into
 - git. Empty directories are included in the result. -}
notInRepoIncludingEmptyDirectories :: [Options] -> Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
notInRepoIncludingEmptyDirectories = notInRepo' [Param "--directory"]

{- Finds all files in the specified locations, whether checked into git or
 - not. -}
allFiles :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
allFiles = inRepo' [Param "--cached", Param "--others"]

{- Returns a list of files in the specified locations that have been
 - deleted. -}
deleted :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
deleted = inRepo' [Param "--deleted"]

{- Returns a list of files in the specified locations that have been
 - modified. -}
modified :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
modified = inRepo' [Param "--modified"]

{- Returns a list of all files that are staged for commit. -}
staged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
staged = staged' []

{- Returns a list of the files, staged for commit, that are being added,
 - moved, or changed (but not deleted), from the specified locations. -}
stagedNotDeleted :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
stagedNotDeleted = staged' [Param "--diff-filter=ACMRT"]

staged' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
staged' ps l repo = guardSafeForLsFiles repo $
	pipeNullSplit' (prefix ++ ps ++ suffix) repo
  where
	prefix = [Param "diff", Param "--cached", Param "--name-only", Param "-z"]
	suffix = Param "--" : map (File . fromRawFilePath) l

type StagedDetails = (RawFilePath, Sha, FileMode, StageNum)

type StageNum = Int

{- Used when not in a merge conflict. -}
usualStageNum :: Int
usualStageNum = 0

{- WHen in a merge conflict, git uses stage number 2 for the local HEAD
 - side of the merge conflict. -}
mergeConflictHeadStageNum :: Int
mergeConflictHeadStageNum = 2

{- Returns details about all files that are staged in the index.
 -
 - Note that, during a conflict, a file will appear in the list
 - more than once with different stage numbers.
 -}
stagedDetails :: [RawFilePath] -> Repo -> IO ([StagedDetails], IO Bool)
stagedDetails = stagedDetails' parseStagedDetails []

stagedDetails' :: (S.ByteString -> Maybe t) -> [CommandParam] -> [RawFilePath] -> Repo -> IO ([t], IO Bool)
stagedDetails' parser ps l repo = guardSafeForLsFiles repo $ do
	(ls, cleanup) <- pipeNullSplit' params repo
	return (mapMaybe parser ls, cleanup)
  where
	params = Param "ls-files" : Param "--stage" : Param "-z" : ps ++ 
		Param "--" : map (File . fromRawFilePath) l

parseStagedDetails :: S.ByteString -> Maybe StagedDetails
parseStagedDetails = eitherToMaybe . A.parseOnly parser
  where
	parser = do
		mode <- octal
		void $ A8.char ' '
		sha <- maybe (fail "bad sha") return . extractSha =<< nextword
		void $ A8.char ' '
		stagenum <- A8.decimal
		void $ A8.char '\t'
		file <- A.takeByteString
		return (file, sha, mode, stagenum)
	
	nextword = A8.takeTill (== ' ')

{- Returns a list of the files in the specified locations that are staged
 - for commit, and whose type has changed. -}
typeChangedStaged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
typeChangedStaged = typeChanged' [Param "--cached"]

{- Returns a list of the files in the specified locations whose type has
 - changed.  Files only staged for commit will not be included. -}
typeChanged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
typeChanged = typeChanged' []

typeChanged' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
typeChanged' ps l repo = guardSafeForLsFiles repo $ do
	(fs, cleanup) <- pipeNullSplit' (prefix ++ ps ++ suffix) repo
	-- git diff returns filenames relative to the top of the git repo;
	-- convert to filenames relative to the cwd, like git ls-files.
	top <- absPath (repoPath repo)
	currdir <- R.getCurrentDirectory
	return (map (\f -> relPathDirToFileAbs currdir $ top P.</> f) fs, cleanup)
  where
	prefix = 
		[ Param "diff"
		, Param "--name-only"
		, Param "--diff-filter=T"
		, Param "-z"
		]
	suffix = Param "--" : (if null l then [File "."] else map (File . fromRawFilePath) l)

{- A item in conflict has two possible values.
 - Either can be Nothing, when that side deleted the file. -}
data Conflicting v = Conflicting
	{ valUs :: Maybe v
	, valThem :: Maybe v
	} deriving (Show)

data Unmerged = Unmerged
	{ unmergedFile :: RawFilePath
	, unmergedTreeItemType :: Conflicting TreeItemType
	, unmergedSha :: Conflicting Sha
	, unmergedSiblingFile :: Maybe RawFilePath
	-- ^ Normally this is Nothing, because a
	-- merge conflict is represented as a single file with two
	-- stages. However, git resolvers sometimes choose to stage
	-- two files, one for each side of the merge conflict. In such a case,
	-- this is used for the name of the second file, which is related
	-- to the first file. (Eg, "foo" and "foo~ref")
	} deriving (Show)

{- Returns a list of the files in the specified locations that have
 - unresolved merge conflicts.
 -
 - ls-files outputs multiple lines per conflicting file, each with its own
 - stage number:
 -   1 = old version, can be ignored
 -   2 = us
 -   3 = them
 - If line 2 or 3 is omitted, that side removed the file.
 -}
unmerged :: [RawFilePath] -> Repo -> IO ([Unmerged], IO Bool)
unmerged l repo = guardSafeForLsFiles repo $ do
	(fs, cleanup) <- pipeNullSplit params repo
	return (reduceUnmerged [] $ catMaybes $ map (parseUnmerged . decodeBL) fs, cleanup)
  where
	params = 
		Param "ls-files" :
		Param "--unmerged" :
		Param "-z" :
		Param "--" :
		map (File . fromRawFilePath) l

data InternalUnmerged = InternalUnmerged
	{ isus :: Bool
	, ifile :: RawFilePath
	, itreeitemtype :: Maybe TreeItemType
	, isha :: Maybe Sha
	} deriving (Show)

parseUnmerged :: String -> Maybe InternalUnmerged
parseUnmerged s
	| null file = Nothing
	| otherwise = case words metadata of
		(rawtreeitemtype:rawsha:rawstage:_) -> do
			stage <- readish rawstage :: Maybe Int
			if stage /= 2 && stage /= 3
				then Nothing
				else do
					treeitemtype <- readTreeItemType (encodeBS rawtreeitemtype)
					sha <- extractSha (encodeBS rawsha)
					return $ InternalUnmerged (stage == 2) (toRawFilePath file)
						(Just treeitemtype) (Just sha)
		_ -> Nothing
  where
	(metadata, file) = separate (== '\t') s

reduceUnmerged :: [Unmerged] -> [InternalUnmerged] -> [Unmerged]
reduceUnmerged c [] = c
reduceUnmerged c (i:is) = reduceUnmerged (new:c) rest
  where
	(rest, sibi) = findsib i is
	(treeitemtypeA, treeitemtypeB, shaA, shaB)
		| isus i    = (itreeitemtype i, itreeitemtype sibi, isha i, isha sibi)
		| otherwise = (itreeitemtype sibi, itreeitemtype i, isha sibi, isha i)
	new = Unmerged
		{ unmergedFile = ifile i
		, unmergedTreeItemType = Conflicting treeitemtypeA treeitemtypeB
		, unmergedSha = Conflicting shaA shaB
		, unmergedSiblingFile = if ifile sibi == ifile i
			then Nothing
			else Just (ifile sibi)
		}
	findsib templatei [] = ([], removed templatei)
	findsib templatei (l:ls)
		| ifile l == ifile templatei || issibfile templatei l = (ls, l)
		| otherwise = (l:ls, removed templatei)
	removed templatei = templatei
		{ isus = not (isus templatei)
		, itreeitemtype = Nothing
		, isha = Nothing
		}
	-- foo~<ref> are unmerged sibling files of foo
	-- Some versions or resolvers of git stage the sibling files,
	-- other versions or resolvers do not.
	issibfile x y = (ifile x <> "~") `S.isPrefixOf` ifile y
		&& isus x || isus y
		&& not (isus x && isus y)

{- Gets the InodeCache equivalent information stored in the git index.
 -
 - Note that this uses a --debug option whose output could change at some
 - point in the future. If the output is not as expected, will use Nothing.
 -}
inodeCaches :: [RawFilePath] -> Repo -> IO ([(FilePath, Maybe InodeCache)], IO Bool)
inodeCaches locs repo = guardSafeForLsFiles repo $ do
	(ls, cleanup) <- pipeNullSplit params repo
	return (parse Nothing (map decodeBL ls), cleanup)
  where
	params = 
		Param "ls-files" :
		Param "--cached" :
		Param "-z" :
		Param "--debug" :
		Param "--" :
		map (File . fromRawFilePath) locs
	
	parse Nothing (f:ls) = parse (Just f) ls
	parse (Just f) (s:[]) = 
		let i = parsedebug s
		in (f, i) : []
	parse (Just f) (s:ls) =
		let (d, f') = splitdebug s
		    i = parsedebug d
		in (f, i) : parse (Just f') ls
	parse _ _ = []

	-- First 5 lines are --debug output, remainder is the next filename.
	-- This assumes that --debug does not start outputting more lines.
	splitdebug s = case splitc '\n' s of
		(d1:d2:d3:d4:d5:rest) ->
			( intercalate "\n" [d1, d2, d3, d4, d5]
			, intercalate "\n" rest
			)
		_ -> ("", s)
	
	-- This parser allows for some changes to the --debug output,
	-- including reordering, or adding more items.
	parsedebug s = do
		let l = words s
		let iskey v = ":" `isSuffixOf` v
		let m = M.fromList $ zip
			(filter iskey l)
			(filter (not . iskey) l)
		mkInodeCache
			<$> (readish =<< M.lookup "ino:" m)
			<*> (readish =<< M.lookup "size:" m)
			<*> (parsePOSIXTime =<< (replace ":" "." <$> M.lookup "mtime:" m))