diff options
Diffstat (limited to 'Utility/Path.hs')
-rw-r--r-- | Utility/Path.hs | 326 |
1 files changed, 151 insertions, 175 deletions
diff --git a/Utility/Path.hs b/Utility/Path.hs index ecc752c..64ef076 100644 --- a/Utility/Path.hs +++ b/Utility/Path.hs @@ -1,63 +1,63 @@ {- path manipulation - - - Copyright 2010-2014 Joey Hess <id@joeyh.name> + - Copyright 2010-2020 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} {-# OPTIONS_GHC -fno-warn-tabs #-} module Utility.Path ( simplifyPath, - absPathFrom, parentDir, upFrom, dirContains, - absPath, - relPathCwdToFile, - relPathDirToFile, - relPathDirToFileAbs, segmentPaths, + segmentPaths', runSegmentPaths, - relHome, - inPath, - searchPath, + runSegmentPaths', dotfile, - sanitizeFilePath, splitShortExtensions, - - prop_upFrom_basics, - prop_relPathDirToFile_basics, - prop_relPathDirToFile_regressionTest, + splitShortExtensions', + relPathDirToFileAbs, + inSearchPath, + searchPath, + searchPathContents, ) where -import System.FilePath +import System.FilePath.ByteString +import qualified System.FilePath as P +import qualified Data.ByteString as B import Data.List import Data.Maybe -import Data.Char +import Control.Monad import Control.Applicative import Prelude import Utility.Monad -import Utility.UserInfo -import Utility.Directory -import Utility.Split +import Utility.SystemDirectory +import Utility.Exception + +#ifdef mingw32_HOST_OS +import Data.Char import Utility.FileSystemEncoding +#endif {- Simplifies a path, removing any "." component, collapsing "dir/..", - and removing the trailing path separator. - - On Windows, preserves whichever style of path separator might be used in - - the input FilePaths. This is done because some programs in Windows + - the input RawFilePaths. This is done because some programs in Windows - demand a particular path separator -- and which one actually varies! - - This does not guarantee that two paths that refer to the same location, - and are both relative to the same location (or both absolute) will - - yeild the same result. Run both through normalise from System.FilePath + - yield the same result. Run both through normalise from System.RawFilePath - to ensure that. -} -simplifyPath :: FilePath -> FilePath +simplifyPath :: RawFilePath -> RawFilePath simplifyPath path = dropTrailingPathSeparator $ joinDrive drive $ joinPath $ norm [] $ splitPath path' where @@ -72,88 +72,143 @@ simplifyPath path = dropTrailingPathSeparator $ where p' = dropTrailingPathSeparator p -{- Makes a path absolute. - - - - The first parameter is a base directory (ie, the cwd) to use if the path - - is not already absolute, and should itsef be absolute. - - - - Does not attempt to deal with edge cases or ensure security with - - untrusted inputs. - -} -absPathFrom :: FilePath -> FilePath -> FilePath -absPathFrom dir path = simplifyPath (combine dir path) - {- takeDirectory "foo/bar/" is "foo/bar". This instead yields "foo" -} -parentDir :: FilePath -> FilePath +parentDir :: RawFilePath -> RawFilePath parentDir = takeDirectory . dropTrailingPathSeparator {- Just the parent directory of a path, or Nothing if the path has no -- parent (ie for "/" or ".") -} -upFrom :: FilePath -> Maybe FilePath +- parent (ie for "/" or "." or "foo") -} +upFrom :: RawFilePath -> Maybe RawFilePath upFrom dir | length dirs < 2 = Nothing - | otherwise = Just $ joinDrive drive $ intercalate s $ init dirs + | otherwise = Just $ joinDrive drive $ + B.intercalate (B.singleton pathSeparator) $ init dirs where -- on Unix, the drive will be "/" when the dir is absolute, -- otherwise "" (drive, path) = splitDrive dir - s = [pathSeparator] - dirs = filter (not . null) $ split s path - -prop_upFrom_basics :: FilePath -> Bool -prop_upFrom_basics dir - | null dir = True - | dir == "/" = p == Nothing - | otherwise = p /= Just dir - where - p = upFrom dir + dirs = filter (not . B.null) $ B.splitWith isPathSeparator path -{- Checks if the first FilePath is, or could be said to contain the second. +{- Checks if the first RawFilePath is, or could be said to contain the second. - For example, "foo/" contains "foo/bar". Also, "foo", "./foo", "foo/" etc - - are all equivilant. + - are all equivalent. -} -dirContains :: FilePath -> FilePath -> Bool +dirContains :: RawFilePath -> RawFilePath -> Bool dirContains a b = a == b || a' == b' - || (addTrailingPathSeparator a') `isPrefixOf` b' - || a' == "." && normalise ("." </> b') == b' + || (a'' `B.isPrefixOf` b' && avoiddotdotb) + || a' == "." && normalise ("." </> b') == b' && nodotdot b' + || dotdotcontains where a' = norm a + a'' = addTrailingPathSeparator a' b' = norm b norm = normalise . simplifyPath -{- Converts a filename into an absolute path. - - - - Unlike Directory.canonicalizePath, this does not require the path - - already exists. -} -absPath :: FilePath -> IO FilePath -absPath file = do - cwd <- getCurrentDirectory - return $ absPathFrom cwd file + {- This handles the case where a is ".." and b is "../..", + - which is not inside a. Similarly, "../.." does not contain + - "../../../". Due to the use of norm, cases like + - "../../foo/../../" get converted to eg "../../.." and + - so do not need to be handled specially here. + - + - When this is called, we already know that + - a'' is a prefix of b', so all that needs to be done is drop + - that prefix, and check if the next path component is ".." + -} + avoiddotdotb = nodotdot $ B.drop (B.length a'') b' -{- Constructs a relative path from the CWD to a file. + nodotdot p = all (not . isdotdot) (splitPath p) + + isdotdot s = dropTrailingPathSeparator s == ".." + + {- This handles the case where a is ".." or "../.." etc, + - and b is "foo" or "../foo" etc. The rule is that when + - a is entirely ".." components, b is under it when it starts + - with fewer ".." components. + - + - Due to the use of norm, cases like "../../foo/../../" get + - converted to eg "../../../" and so do not need to be handled + - specially here. + -} + dotdotcontains + | isAbsolute b' = False + | otherwise = + let aps = splitPath a' + bps = splitPath b' + in if all isdotdot aps + then length (takeWhile isdotdot bps) < length aps + else False + +{- Given an original list of paths, and an expanded list derived from it, + - which may be arbitrarily reordered, generates a list of lists, where + - each sublist corresponds to one of the original paths. + - + - When the original path is a directory, any items in the expanded list + - that are contained in that directory will appear in its segment. - - - For example, assuming CWD is /tmp/foo/bar: - - relPathCwdToFile "/tmp/foo" == ".." - - relPathCwdToFile "/tmp/foo/bar" == "" + - The order of the original list of paths is attempted to be preserved in + - the order of the returned segments. However, doing so has a O^NM + - growth factor. So, if the original list has more than 100 paths on it, + - we stop preserving ordering at that point. Presumably a user passing + - that many paths in doesn't care too much about order of the later ones. -} -relPathCwdToFile :: FilePath -> IO FilePath -relPathCwdToFile f = do - c <- getCurrentDirectory - relPathDirToFile c f +segmentPaths :: (a -> RawFilePath) -> [RawFilePath] -> [a] -> [[a]] +segmentPaths = segmentPaths' (\_ r -> r) -{- Constructs a relative path from a directory to a file. -} -relPathDirToFile :: FilePath -> FilePath -> IO FilePath -relPathDirToFile from to = relPathDirToFileAbs <$> absPath from <*> absPath to +segmentPaths' :: (Maybe RawFilePath -> a -> r) -> (a -> RawFilePath) -> [RawFilePath] -> [a] -> [[r]] +segmentPaths' f _ [] new = [map (f Nothing) new] +segmentPaths' f _ [i] new = [map (f (Just i)) new] -- optimisation +segmentPaths' f c (i:is) new = + map (f (Just i)) found : segmentPaths' f c is rest + where + (found, rest) = if length is < 100 + then partition ini new + else break (not . ini) new + ini p = i `dirContains` c p -{- This requires the first path to be absolute, and the - - second path cannot contain ../ or ./ +{- This assumes that it's cheaper to call segmentPaths on the result, + - than it would be to run the action separately with each path. In + - the case of git file list commands, that assumption tends to hold. + -} +runSegmentPaths :: (a -> RawFilePath) -> ([RawFilePath] -> IO [a]) -> [RawFilePath] -> IO [[a]] +runSegmentPaths c a paths = segmentPaths c paths <$> a paths + +runSegmentPaths' :: (Maybe RawFilePath -> a -> r) -> (a -> RawFilePath) -> ([RawFilePath] -> IO [a]) -> [RawFilePath] -> IO [[r]] +runSegmentPaths' si c a paths = segmentPaths' si c paths <$> a paths + +{- Checks if a filename is a unix dotfile. All files inside dotdirs + - count as dotfiles. -} +dotfile :: RawFilePath -> Bool +dotfile file + | f == "." = False + | f == ".." = False + | f == "" = False + | otherwise = "." `B.isPrefixOf` f || dotfile (takeDirectory file) + where + f = takeFileName file + +{- Similar to splitExtensions, but knows that some things in RawFilePaths + - after a dot are too long to be extensions. -} +splitShortExtensions :: RawFilePath -> (RawFilePath, [B.ByteString]) +splitShortExtensions = splitShortExtensions' 5 -- enough for ".jpeg" +splitShortExtensions' :: Int -> RawFilePath -> (RawFilePath, [B.ByteString]) +splitShortExtensions' maxextension = go [] + where + go c f + | len > 0 && len <= maxextension && not (B.null base) = + go (ext:c) base + | otherwise = (f, c) + where + (base, ext) = splitExtension f + len = B.length ext + +{- This requires both paths to be absolute and normalized. - - On Windows, if the paths are on different drives, - a relative path is not possible and the path is simply - returned as-is. -} -relPathDirToFileAbs :: FilePath -> FilePath -> FilePath +relPathDirToFileAbs :: RawFilePath -> RawFilePath -> RawFilePath relPathDirToFileAbs from to #ifdef mingw32_HOST_OS | normdrive from /= normdrive to = to @@ -169,72 +224,21 @@ relPathDirToFileAbs from to dotdots = replicate (length pfrom - numcommon) ".." numcommon = length common #ifdef mingw32_HOST_OS - normdrive = map toLower . takeWhile (/= ':') . takeDrive + normdrive = map toLower + -- Get just the drive letter, removing any leading + -- path separator, which takeDrive leaves on the drive + -- letter. + . dropWhileEnd (isPathSeparator . fromIntegral . ord) + . fromRawFilePath + . takeDrive #endif -prop_relPathDirToFile_basics :: FilePath -> FilePath -> Bool -prop_relPathDirToFile_basics from to - | null from || null to = True - | from == to = null r - | otherwise = not (null r) - where - r = relPathDirToFileAbs from to - -prop_relPathDirToFile_regressionTest :: Bool -prop_relPathDirToFile_regressionTest = same_dir_shortcurcuits_at_difference - where - {- Two paths have the same directory component at the same - - location, but it's not really the same directory. - - Code used to get this wrong. -} - same_dir_shortcurcuits_at_difference = - relPathDirToFileAbs (joinPath [pathSeparator : "tmp", "r", "lll", "xxx", "yyy", "18"]) - (joinPath [pathSeparator : "tmp", "r", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"]) - == joinPath ["..", "..", "..", "..", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"] - -{- Given an original list of paths, and an expanded list derived from it, - - which may be arbitrarily reordered, generates a list of lists, where - - each sublist corresponds to one of the original paths. - - - - When the original path is a directory, any items in the expanded list - - that are contained in that directory will appear in its segment. - - - - The order of the original list of paths is attempted to be preserved in - - the order of the returned segments. However, doing so has a O^NM - - growth factor. So, if the original list has more than 100 paths on it, - - we stop preserving ordering at that point. Presumably a user passing - - that many paths in doesn't care too much about order of the later ones. - -} -segmentPaths :: [RawFilePath] -> [RawFilePath] -> [[RawFilePath]] -segmentPaths [] new = [new] -segmentPaths [_] new = [new] -- optimisation -segmentPaths (l:ls) new = found : segmentPaths ls rest - where - (found, rest) = if length ls < 100 - then partition inl new - else break (not . inl) new - inl f = fromRawFilePath l `dirContains` fromRawFilePath f - -{- This assumes that it's cheaper to call segmentPaths on the result, - - than it would be to run the action separately with each path. In - - the case of git file list commands, that assumption tends to hold. - -} -runSegmentPaths :: ([RawFilePath] -> IO [RawFilePath]) -> [RawFilePath] -> IO [[RawFilePath]] -runSegmentPaths a paths = segmentPaths paths <$> a paths - -{- Converts paths in the home directory to use ~/ -} -relHome :: FilePath -> IO String -relHome path = do - home <- myHomeDir - return $ if dirContains home path - then "~/" ++ relPathDirToFileAbs home path - else path - {- Checks if a command is available in PATH. - - The command may be fully-qualified, in which case, this succeeds as - long as it exists. -} -inPath :: String -> IO Bool -inPath command = isJust <$> searchPath command +inSearchPath :: String -> IO Bool +inSearchPath command = isJust <$> searchPath command {- Finds a command in PATH and returns the full path to it. - @@ -245,10 +249,10 @@ inPath command = isJust <$> searchPath command -} searchPath :: String -> IO (Maybe FilePath) searchPath command - | isAbsolute command = check command - | otherwise = getSearchPath >>= getM indir + | P.isAbsolute command = check command + | otherwise = P.getSearchPath >>= getM indir where - indir d = check $ d </> command + indir d = check $ d P.</> command check f = firstM doesFileExist #ifdef mingw32_HOST_OS [f, f ++ ".exe"] @@ -256,44 +260,16 @@ searchPath command [f] #endif -{- Checks if a filename is a unix dotfile. All files inside dotdirs - - count as dotfiles. -} -dotfile :: FilePath -> Bool -dotfile file - | f == "." = False - | f == ".." = False - | f == "" = False - | otherwise = "." `isPrefixOf` f || dotfile (takeDirectory file) - where - f = takeFileName file - -{- Given a string that we'd like to use as the basis for FilePath, but that - - was provided by a third party and is not to be trusted, returns the closest - - sane FilePath. +{- Finds commands in PATH that match a predicate. Note that the predicate + - matches on the basename of the command, but the full path to it is + - returned. - - - All spaces and punctuation and other wacky stuff are replaced - - with '_', except for '.' - - "../" will thus turn into ".._", which is safe. + - Note that this will find commands in PATH that are not executable. -} -sanitizeFilePath :: String -> FilePath -sanitizeFilePath = map sanitize - where - sanitize c - | c == '.' = c - | isSpace c || isPunctuation c || isSymbol c || isControl c || c == '/' = '_' - | otherwise = c - -{- Similar to splitExtensions, but knows that some things in FilePaths - - after a dot are too long to be extensions. -} -splitShortExtensions :: FilePath -> (FilePath, [String]) -splitShortExtensions = splitShortExtensions' 5 -- enough for ".jpeg" -splitShortExtensions' :: Int -> FilePath -> (FilePath, [String]) -splitShortExtensions' maxextension = go [] +searchPathContents :: (FilePath -> Bool) -> IO [FilePath] +searchPathContents p = + filterM doesFileExist + =<< (concat <$> (P.getSearchPath >>= mapM go)) where - go c f - | len > 0 && len <= maxextension && not (null base) = - go (ext:c) base - | otherwise = (f, c) - where - (base, ext) = splitExtension f - len = length ext + go d = map (d P.</>) . filter p + <$> catchDefaultIO [] (getDirectoryContents d) |