summaryrefslogtreecommitdiff
path: root/Utility/Path.hs
diff options
context:
space:
mode:
Diffstat (limited to 'Utility/Path.hs')
-rw-r--r--Utility/Path.hs326
1 files changed, 151 insertions, 175 deletions
diff --git a/Utility/Path.hs b/Utility/Path.hs
index ecc752c..64ef076 100644
--- a/Utility/Path.hs
+++ b/Utility/Path.hs
@@ -1,63 +1,63 @@
{- path manipulation
-
- - Copyright 2010-2014 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2020 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fno-warn-tabs #-}
module Utility.Path (
simplifyPath,
- absPathFrom,
parentDir,
upFrom,
dirContains,
- absPath,
- relPathCwdToFile,
- relPathDirToFile,
- relPathDirToFileAbs,
segmentPaths,
+ segmentPaths',
runSegmentPaths,
- relHome,
- inPath,
- searchPath,
+ runSegmentPaths',
dotfile,
- sanitizeFilePath,
splitShortExtensions,
-
- prop_upFrom_basics,
- prop_relPathDirToFile_basics,
- prop_relPathDirToFile_regressionTest,
+ splitShortExtensions',
+ relPathDirToFileAbs,
+ inSearchPath,
+ searchPath,
+ searchPathContents,
) where
-import System.FilePath
+import System.FilePath.ByteString
+import qualified System.FilePath as P
+import qualified Data.ByteString as B
import Data.List
import Data.Maybe
-import Data.Char
+import Control.Monad
import Control.Applicative
import Prelude
import Utility.Monad
-import Utility.UserInfo
-import Utility.Directory
-import Utility.Split
+import Utility.SystemDirectory
+import Utility.Exception
+
+#ifdef mingw32_HOST_OS
+import Data.Char
import Utility.FileSystemEncoding
+#endif
{- Simplifies a path, removing any "." component, collapsing "dir/..",
- and removing the trailing path separator.
-
- On Windows, preserves whichever style of path separator might be used in
- - the input FilePaths. This is done because some programs in Windows
+ - the input RawFilePaths. This is done because some programs in Windows
- demand a particular path separator -- and which one actually varies!
-
- This does not guarantee that two paths that refer to the same location,
- and are both relative to the same location (or both absolute) will
- - yeild the same result. Run both through normalise from System.FilePath
+ - yield the same result. Run both through normalise from System.RawFilePath
- to ensure that.
-}
-simplifyPath :: FilePath -> FilePath
+simplifyPath :: RawFilePath -> RawFilePath
simplifyPath path = dropTrailingPathSeparator $
joinDrive drive $ joinPath $ norm [] $ splitPath path'
where
@@ -72,88 +72,143 @@ simplifyPath path = dropTrailingPathSeparator $
where
p' = dropTrailingPathSeparator p
-{- Makes a path absolute.
- -
- - The first parameter is a base directory (ie, the cwd) to use if the path
- - is not already absolute, and should itsef be absolute.
- -
- - Does not attempt to deal with edge cases or ensure security with
- - untrusted inputs.
- -}
-absPathFrom :: FilePath -> FilePath -> FilePath
-absPathFrom dir path = simplifyPath (combine dir path)
-
{- takeDirectory "foo/bar/" is "foo/bar". This instead yields "foo" -}
-parentDir :: FilePath -> FilePath
+parentDir :: RawFilePath -> RawFilePath
parentDir = takeDirectory . dropTrailingPathSeparator
{- Just the parent directory of a path, or Nothing if the path has no
-- parent (ie for "/" or ".") -}
-upFrom :: FilePath -> Maybe FilePath
+- parent (ie for "/" or "." or "foo") -}
+upFrom :: RawFilePath -> Maybe RawFilePath
upFrom dir
| length dirs < 2 = Nothing
- | otherwise = Just $ joinDrive drive $ intercalate s $ init dirs
+ | otherwise = Just $ joinDrive drive $
+ B.intercalate (B.singleton pathSeparator) $ init dirs
where
-- on Unix, the drive will be "/" when the dir is absolute,
-- otherwise ""
(drive, path) = splitDrive dir
- s = [pathSeparator]
- dirs = filter (not . null) $ split s path
-
-prop_upFrom_basics :: FilePath -> Bool
-prop_upFrom_basics dir
- | null dir = True
- | dir == "/" = p == Nothing
- | otherwise = p /= Just dir
- where
- p = upFrom dir
+ dirs = filter (not . B.null) $ B.splitWith isPathSeparator path
-{- Checks if the first FilePath is, or could be said to contain the second.
+{- Checks if the first RawFilePath is, or could be said to contain the second.
- For example, "foo/" contains "foo/bar". Also, "foo", "./foo", "foo/" etc
- - are all equivilant.
+ - are all equivalent.
-}
-dirContains :: FilePath -> FilePath -> Bool
+dirContains :: RawFilePath -> RawFilePath -> Bool
dirContains a b = a == b
|| a' == b'
- || (addTrailingPathSeparator a') `isPrefixOf` b'
- || a' == "." && normalise ("." </> b') == b'
+ || (a'' `B.isPrefixOf` b' && avoiddotdotb)
+ || a' == "." && normalise ("." </> b') == b' && nodotdot b'
+ || dotdotcontains
where
a' = norm a
+ a'' = addTrailingPathSeparator a'
b' = norm b
norm = normalise . simplifyPath
-{- Converts a filename into an absolute path.
- -
- - Unlike Directory.canonicalizePath, this does not require the path
- - already exists. -}
-absPath :: FilePath -> IO FilePath
-absPath file = do
- cwd <- getCurrentDirectory
- return $ absPathFrom cwd file
+ {- This handles the case where a is ".." and b is "../..",
+ - which is not inside a. Similarly, "../.." does not contain
+ - "../../../". Due to the use of norm, cases like
+ - "../../foo/../../" get converted to eg "../../.." and
+ - so do not need to be handled specially here.
+ -
+ - When this is called, we already know that
+ - a'' is a prefix of b', so all that needs to be done is drop
+ - that prefix, and check if the next path component is ".."
+ -}
+ avoiddotdotb = nodotdot $ B.drop (B.length a'') b'
-{- Constructs a relative path from the CWD to a file.
+ nodotdot p = all (not . isdotdot) (splitPath p)
+
+ isdotdot s = dropTrailingPathSeparator s == ".."
+
+ {- This handles the case where a is ".." or "../.." etc,
+ - and b is "foo" or "../foo" etc. The rule is that when
+ - a is entirely ".." components, b is under it when it starts
+ - with fewer ".." components.
+ -
+ - Due to the use of norm, cases like "../../foo/../../" get
+ - converted to eg "../../../" and so do not need to be handled
+ - specially here.
+ -}
+ dotdotcontains
+ | isAbsolute b' = False
+ | otherwise =
+ let aps = splitPath a'
+ bps = splitPath b'
+ in if all isdotdot aps
+ then length (takeWhile isdotdot bps) < length aps
+ else False
+
+{- Given an original list of paths, and an expanded list derived from it,
+ - which may be arbitrarily reordered, generates a list of lists, where
+ - each sublist corresponds to one of the original paths.
+ -
+ - When the original path is a directory, any items in the expanded list
+ - that are contained in that directory will appear in its segment.
-
- - For example, assuming CWD is /tmp/foo/bar:
- - relPathCwdToFile "/tmp/foo" == ".."
- - relPathCwdToFile "/tmp/foo/bar" == ""
+ - The order of the original list of paths is attempted to be preserved in
+ - the order of the returned segments. However, doing so has a O^NM
+ - growth factor. So, if the original list has more than 100 paths on it,
+ - we stop preserving ordering at that point. Presumably a user passing
+ - that many paths in doesn't care too much about order of the later ones.
-}
-relPathCwdToFile :: FilePath -> IO FilePath
-relPathCwdToFile f = do
- c <- getCurrentDirectory
- relPathDirToFile c f
+segmentPaths :: (a -> RawFilePath) -> [RawFilePath] -> [a] -> [[a]]
+segmentPaths = segmentPaths' (\_ r -> r)
-{- Constructs a relative path from a directory to a file. -}
-relPathDirToFile :: FilePath -> FilePath -> IO FilePath
-relPathDirToFile from to = relPathDirToFileAbs <$> absPath from <*> absPath to
+segmentPaths' :: (Maybe RawFilePath -> a -> r) -> (a -> RawFilePath) -> [RawFilePath] -> [a] -> [[r]]
+segmentPaths' f _ [] new = [map (f Nothing) new]
+segmentPaths' f _ [i] new = [map (f (Just i)) new] -- optimisation
+segmentPaths' f c (i:is) new =
+ map (f (Just i)) found : segmentPaths' f c is rest
+ where
+ (found, rest) = if length is < 100
+ then partition ini new
+ else break (not . ini) new
+ ini p = i `dirContains` c p
-{- This requires the first path to be absolute, and the
- - second path cannot contain ../ or ./
+{- This assumes that it's cheaper to call segmentPaths on the result,
+ - than it would be to run the action separately with each path. In
+ - the case of git file list commands, that assumption tends to hold.
+ -}
+runSegmentPaths :: (a -> RawFilePath) -> ([RawFilePath] -> IO [a]) -> [RawFilePath] -> IO [[a]]
+runSegmentPaths c a paths = segmentPaths c paths <$> a paths
+
+runSegmentPaths' :: (Maybe RawFilePath -> a -> r) -> (a -> RawFilePath) -> ([RawFilePath] -> IO [a]) -> [RawFilePath] -> IO [[r]]
+runSegmentPaths' si c a paths = segmentPaths' si c paths <$> a paths
+
+{- Checks if a filename is a unix dotfile. All files inside dotdirs
+ - count as dotfiles. -}
+dotfile :: RawFilePath -> Bool
+dotfile file
+ | f == "." = False
+ | f == ".." = False
+ | f == "" = False
+ | otherwise = "." `B.isPrefixOf` f || dotfile (takeDirectory file)
+ where
+ f = takeFileName file
+
+{- Similar to splitExtensions, but knows that some things in RawFilePaths
+ - after a dot are too long to be extensions. -}
+splitShortExtensions :: RawFilePath -> (RawFilePath, [B.ByteString])
+splitShortExtensions = splitShortExtensions' 5 -- enough for ".jpeg"
+splitShortExtensions' :: Int -> RawFilePath -> (RawFilePath, [B.ByteString])
+splitShortExtensions' maxextension = go []
+ where
+ go c f
+ | len > 0 && len <= maxextension && not (B.null base) =
+ go (ext:c) base
+ | otherwise = (f, c)
+ where
+ (base, ext) = splitExtension f
+ len = B.length ext
+
+{- This requires both paths to be absolute and normalized.
-
- On Windows, if the paths are on different drives,
- a relative path is not possible and the path is simply
- returned as-is.
-}
-relPathDirToFileAbs :: FilePath -> FilePath -> FilePath
+relPathDirToFileAbs :: RawFilePath -> RawFilePath -> RawFilePath
relPathDirToFileAbs from to
#ifdef mingw32_HOST_OS
| normdrive from /= normdrive to = to
@@ -169,72 +224,21 @@ relPathDirToFileAbs from to
dotdots = replicate (length pfrom - numcommon) ".."
numcommon = length common
#ifdef mingw32_HOST_OS
- normdrive = map toLower . takeWhile (/= ':') . takeDrive
+ normdrive = map toLower
+ -- Get just the drive letter, removing any leading
+ -- path separator, which takeDrive leaves on the drive
+ -- letter.
+ . dropWhileEnd (isPathSeparator . fromIntegral . ord)
+ . fromRawFilePath
+ . takeDrive
#endif
-prop_relPathDirToFile_basics :: FilePath -> FilePath -> Bool
-prop_relPathDirToFile_basics from to
- | null from || null to = True
- | from == to = null r
- | otherwise = not (null r)
- where
- r = relPathDirToFileAbs from to
-
-prop_relPathDirToFile_regressionTest :: Bool
-prop_relPathDirToFile_regressionTest = same_dir_shortcurcuits_at_difference
- where
- {- Two paths have the same directory component at the same
- - location, but it's not really the same directory.
- - Code used to get this wrong. -}
- same_dir_shortcurcuits_at_difference =
- relPathDirToFileAbs (joinPath [pathSeparator : "tmp", "r", "lll", "xxx", "yyy", "18"])
- (joinPath [pathSeparator : "tmp", "r", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"])
- == joinPath ["..", "..", "..", "..", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"]
-
-{- Given an original list of paths, and an expanded list derived from it,
- - which may be arbitrarily reordered, generates a list of lists, where
- - each sublist corresponds to one of the original paths.
- -
- - When the original path is a directory, any items in the expanded list
- - that are contained in that directory will appear in its segment.
- -
- - The order of the original list of paths is attempted to be preserved in
- - the order of the returned segments. However, doing so has a O^NM
- - growth factor. So, if the original list has more than 100 paths on it,
- - we stop preserving ordering at that point. Presumably a user passing
- - that many paths in doesn't care too much about order of the later ones.
- -}
-segmentPaths :: [RawFilePath] -> [RawFilePath] -> [[RawFilePath]]
-segmentPaths [] new = [new]
-segmentPaths [_] new = [new] -- optimisation
-segmentPaths (l:ls) new = found : segmentPaths ls rest
- where
- (found, rest) = if length ls < 100
- then partition inl new
- else break (not . inl) new
- inl f = fromRawFilePath l `dirContains` fromRawFilePath f
-
-{- This assumes that it's cheaper to call segmentPaths on the result,
- - than it would be to run the action separately with each path. In
- - the case of git file list commands, that assumption tends to hold.
- -}
-runSegmentPaths :: ([RawFilePath] -> IO [RawFilePath]) -> [RawFilePath] -> IO [[RawFilePath]]
-runSegmentPaths a paths = segmentPaths paths <$> a paths
-
-{- Converts paths in the home directory to use ~/ -}
-relHome :: FilePath -> IO String
-relHome path = do
- home <- myHomeDir
- return $ if dirContains home path
- then "~/" ++ relPathDirToFileAbs home path
- else path
-
{- Checks if a command is available in PATH.
-
- The command may be fully-qualified, in which case, this succeeds as
- long as it exists. -}
-inPath :: String -> IO Bool
-inPath command = isJust <$> searchPath command
+inSearchPath :: String -> IO Bool
+inSearchPath command = isJust <$> searchPath command
{- Finds a command in PATH and returns the full path to it.
-
@@ -245,10 +249,10 @@ inPath command = isJust <$> searchPath command
-}
searchPath :: String -> IO (Maybe FilePath)
searchPath command
- | isAbsolute command = check command
- | otherwise = getSearchPath >>= getM indir
+ | P.isAbsolute command = check command
+ | otherwise = P.getSearchPath >>= getM indir
where
- indir d = check $ d </> command
+ indir d = check $ d P.</> command
check f = firstM doesFileExist
#ifdef mingw32_HOST_OS
[f, f ++ ".exe"]
@@ -256,44 +260,16 @@ searchPath command
[f]
#endif
-{- Checks if a filename is a unix dotfile. All files inside dotdirs
- - count as dotfiles. -}
-dotfile :: FilePath -> Bool
-dotfile file
- | f == "." = False
- | f == ".." = False
- | f == "" = False
- | otherwise = "." `isPrefixOf` f || dotfile (takeDirectory file)
- where
- f = takeFileName file
-
-{- Given a string that we'd like to use as the basis for FilePath, but that
- - was provided by a third party and is not to be trusted, returns the closest
- - sane FilePath.
+{- Finds commands in PATH that match a predicate. Note that the predicate
+ - matches on the basename of the command, but the full path to it is
+ - returned.
-
- - All spaces and punctuation and other wacky stuff are replaced
- - with '_', except for '.'
- - "../" will thus turn into ".._", which is safe.
+ - Note that this will find commands in PATH that are not executable.
-}
-sanitizeFilePath :: String -> FilePath
-sanitizeFilePath = map sanitize
- where
- sanitize c
- | c == '.' = c
- | isSpace c || isPunctuation c || isSymbol c || isControl c || c == '/' = '_'
- | otherwise = c
-
-{- Similar to splitExtensions, but knows that some things in FilePaths
- - after a dot are too long to be extensions. -}
-splitShortExtensions :: FilePath -> (FilePath, [String])
-splitShortExtensions = splitShortExtensions' 5 -- enough for ".jpeg"
-splitShortExtensions' :: Int -> FilePath -> (FilePath, [String])
-splitShortExtensions' maxextension = go []
+searchPathContents :: (FilePath -> Bool) -> IO [FilePath]
+searchPathContents p =
+ filterM doesFileExist
+ =<< (concat <$> (P.getSearchPath >>= mapM go))
where
- go c f
- | len > 0 && len <= maxextension && not (null base) =
- go (ext:c) base
- | otherwise = (f, c)
- where
- (base, ext) = splitExtension f
- len = length ext
+ go d = map (d P.</>) . filter p
+ <$> catchDefaultIO [] (getDirectoryContents d)