diff options
Diffstat (limited to 'Utility')
39 files changed, 2264 insertions, 876 deletions
diff --git a/Utility/Batch.hs b/Utility/Batch.hs index 1d66881..6ed7881 100644 --- a/Utility/Batch.hs +++ b/Utility/Batch.hs @@ -1,6 +1,6 @@ {- Running a long or expensive batch operation niced. - - - Copyright 2013 Joey Hess <id@joeyh.name> + - Copyright 2013-2020 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} @@ -10,6 +10,7 @@ module Utility.Batch ( batch, BatchCommandMaker, + nonBatchCommandMaker, getBatchCommandMaker, toBatchCommand, batchCommand, @@ -22,7 +23,6 @@ import Common import Control.Concurrent.Async import System.Posix.Process #endif -import qualified Control.Exception as E {- Runs an operation, at batch priority. - @@ -42,21 +42,22 @@ batch a = wait =<< batchthread batchthread = asyncBound $ do setProcessPriority 0 maxNice a + maxNice = 19 #else batch a = a #endif -maxNice :: Int -maxNice = 19 - {- Makes a command be run by whichever of nice, ionice, and nocache - are available in the path. -} type BatchCommandMaker = (String, [CommandParam]) -> (String, [CommandParam]) +nonBatchCommandMaker :: BatchCommandMaker +nonBatchCommandMaker = id + getBatchCommandMaker :: IO BatchCommandMaker getBatchCommandMaker = do #ifndef mingw32_HOST_OS - nicers <- filterM (inPath . fst) + nicers <- filterM (inSearchPath . fst) [ ("nice", []) , ("ionice", ["-c3"]) , ("nocache", []) @@ -75,11 +76,7 @@ toBatchCommand v = do return $ batchmaker v {- Runs a command in a way that's suitable for batch jobs that can be - - interrupted. - - - - If the calling thread receives an async exception, it sends the - - command a SIGTERM, and after the command finishes shuttting down, - - it re-raises the async exception. -} + - interrupted. -} batchCommand :: String -> [CommandParam] -> IO Bool batchCommand command params = batchCommandEnv command params Nothing @@ -87,13 +84,4 @@ batchCommandEnv :: String -> [CommandParam] -> Maybe [(String, String)] -> IO Bo batchCommandEnv command params environ = do batchmaker <- getBatchCommandMaker let (command', params') = batchmaker (command, params) - let p = proc command' $ toCommand params' - (_, _, _, pid) <- createProcess $ p { env = environ } - r <- E.try (waitForProcess pid) :: IO (Either E.SomeException ExitCode) - case r of - Right ExitSuccess -> return True - Right _ -> return False - Left asyncexception -> do - terminateProcess pid - void $ waitForProcess pid - E.throwIO asyncexception + boolSystemEnv command' params' environ diff --git a/Utility/CoProcess.hs b/Utility/CoProcess.hs index 2bae40f..e091d43 100644 --- a/Utility/CoProcess.hs +++ b/Utility/CoProcess.hs @@ -10,6 +10,7 @@ module Utility.CoProcess ( CoProcessHandle, + CoProcessState(..), start, stop, query, diff --git a/Utility/CopyFile.hs b/Utility/CopyFile.hs new file mode 100644 index 0000000..207153d --- /dev/null +++ b/Utility/CopyFile.hs @@ -0,0 +1,96 @@ +{- file copying + - + - Copyright 2010-2021 Joey Hess <id@joeyh.name> + - + - License: BSD-2-clause + -} + +module Utility.CopyFile ( + copyFileExternal, + copyCoW, + createLinkOrCopy, + CopyMetaData(..) +) where + +import Common +import qualified BuildInfo +import qualified Utility.RawFilePath as R + +data CopyMetaData + -- Copy timestamps when possible, but no other metadata, and + -- when copying a symlink, makes a copy of its content. + = CopyTimeStamps + -- Copy all metadata when possible. + | CopyAllMetaData + deriving (Eq) + +copyMetaDataParams :: CopyMetaData -> [CommandParam] +copyMetaDataParams meta = map snd $ filter fst + [ (allmeta && BuildInfo.cp_a, Param "-a") + , (allmeta && BuildInfo.cp_p && not BuildInfo.cp_a + , Param "-p") + , (not allmeta && BuildInfo.cp_preserve_timestamps + , Param "--preserve=timestamps") + -- cp -a may preserve xattrs that have special meaning, + -- eg to NFS, and have even been observed to prevent later + -- changing the permissions of the file. So prevent preserving + -- xattrs. + , (allmeta && BuildInfo.cp_a && BuildInfo.cp_no_preserve_xattr_supported + , Param "--no-preserve=xattr") + ] + where + allmeta = meta == CopyAllMetaData + +{- The cp command is used, because I hate reinventing the wheel, + - and because this allows easy access to features like cp --reflink + - and preserving metadata. -} +copyFileExternal :: CopyMetaData -> FilePath -> FilePath -> IO Bool +copyFileExternal meta src dest = do + -- Delete any existing dest file because an unwritable file + -- would prevent cp from working. + void $ tryIO $ removeFile dest + boolSystem "cp" $ params ++ [File src, File dest] + where + params + | BuildInfo.cp_reflink_supported = + Param "--reflink=auto" : copyMetaDataParams meta + | otherwise = copyMetaDataParams meta + +{- When a filesystem supports CoW (and cp does), uses it to make + - an efficient copy of a file. Otherwise, returns False. + - + - The dest file must not exist yet, or it will fail to make a CoW copy, + - and will return False. + -} +copyCoW :: CopyMetaData -> FilePath -> FilePath -> IO Bool +copyCoW meta src dest + | BuildInfo.cp_reflink_supported = do + -- When CoW is not supported, cp will complain to stderr, + -- so have to discard its stderr. + ok <- catchBoolIO $ withNullHandle $ \nullh -> + let p = (proc "cp" $ toCommand $ params ++ [File src, File dest]) + { std_out = UseHandle nullh + , std_err = UseHandle nullh + } + in withCreateProcess p $ \_ _ _ -> checkSuccessProcess + -- When CoW is not supported, cp creates the destination + -- file but leaves it empty. + unless ok $ + void $ tryIO $ removeFile dest + return ok + | otherwise = return False + where + -- Note that in coreutils 9.0, cp uses CoW by default, + -- without needing an option. This s only needed to support + -- older versions. + params = Param "--reflink=always" : copyMetaDataParams meta + +{- Create a hard link if the filesystem allows it, and fall back to copying + - the file. -} +createLinkOrCopy :: RawFilePath -> RawFilePath -> IO Bool +createLinkOrCopy src dest = go `catchIO` const fallback + where + go = do + R.createLink src dest + return True + fallback = copyFileExternal CopyAllMetaData (fromRawFilePath src) (fromRawFilePath dest) diff --git a/Utility/Data.hs b/Utility/Data.hs index 5510845..faf9b34 100644 --- a/Utility/Data.hs +++ b/Utility/Data.hs @@ -1,6 +1,6 @@ {- utilities for simple data types - - - Copyright 2013 Joey Hess <id@joeyh.name> + - Copyright 2013-2021 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} @@ -10,8 +10,12 @@ module Utility.Data ( firstJust, eitherToMaybe, + s2w8, + w82s, ) where +import Data.Word + {- First item in the list that is not Nothing. -} firstJust :: Eq a => [Maybe a] -> Maybe a firstJust ms = case dropWhile (== Nothing) ms of @@ -20,3 +24,15 @@ firstJust ms = case dropWhile (== Nothing) ms of eitherToMaybe :: Either a b -> Maybe b eitherToMaybe = either (const Nothing) Just + +c2w8 :: Char -> Word8 +c2w8 = fromIntegral . fromEnum + +w82c :: Word8 -> Char +w82c = toEnum . fromIntegral + +s2w8 :: String -> [Word8] +s2w8 = map c2w8 + +w82s :: [Word8] -> String +w82s = map w82c diff --git a/Utility/DataUnits.hs b/Utility/DataUnits.hs index a6c9ffc..8d910c6 100644 --- a/Utility/DataUnits.hs +++ b/Utility/DataUnits.hs @@ -1,6 +1,6 @@ {- data size display and parsing - - - Copyright 2011 Joey Hess <id@joeyh.name> + - Copyright 2011-2022 Joey Hess <id@joeyh.name> - - License: BSD-2-clause - @@ -21,14 +21,20 @@ - error. This was bad. - - So, a committee was formed. And it arrived at a committee-like decision, - - which satisfied noone, confused everyone, and made the world an uglier - - place. As with all committees, this was meh. + - which satisfied no one, confused everyone, and made the world an uglier + - place. As with all committees, this was meh. Or in this case, "mib". - - And the drive manufacturers happily continued selling drives that are - increasingly smaller than you'd expect, if you don't count on your - fingers. But that are increasingly too big for anyone to much notice. - This caused me to need git-annex. - + - Meanwhile, over in telecommunications land, they were using entirely + - different units that differ only in capitalization sometimes. + - (At one point this convinced me that it was a good idea to buy an ISDN + - line because 128 kb/s sounded really fast! But it was really only 128 + - kbit/s...) + - - Thus, I use units here that I loathe. Because if I didn't, people would - be confused that their drives seem the wrong size, and other people would - complain at me for not being standards compliant. And we call this @@ -38,7 +44,7 @@ module Utility.DataUnits ( dataUnits, storageUnits, - memoryUnits, + committeeUnits, bandwidthUnits, oldSchoolUnits, Unit(..), @@ -62,28 +68,30 @@ data Unit = Unit ByteSize Abbrev Name deriving (Ord, Show, Eq) dataUnits :: [Unit] -dataUnits = storageUnits ++ memoryUnits +dataUnits = storageUnits ++ committeeUnits ++ bandwidthUnits {- Storage units are (stupidly) powers of ten. -} storageUnits :: [Unit] storageUnits = - [ Unit (p 8) "YB" "yottabyte" + [ Unit (p 10) "QB" "quettabyte" + , Unit (p 9) "RB" "ronnabyte" + , Unit (p 8) "YB" "yottabyte" , Unit (p 7) "ZB" "zettabyte" , Unit (p 6) "EB" "exabyte" , Unit (p 5) "PB" "petabyte" , Unit (p 4) "TB" "terabyte" , Unit (p 3) "GB" "gigabyte" , Unit (p 2) "MB" "megabyte" - , Unit (p 1) "kB" "kilobyte" -- weird capitalization thanks to committe - , Unit (p 0) "B" "byte" + , Unit (p 1) "kB" "kilobyte" -- weird capitalization thanks to committee + , Unit 1 "B" "byte" ] where p :: Integer -> Integer p n = 1000^n -{- Memory units are (stupidly named) powers of 2. -} -memoryUnits :: [Unit] -memoryUnits = +{- Committee units are (stupidly named) powers of 2. -} +committeeUnits :: [Unit] +committeeUnits = [ Unit (p 8) "YiB" "yobibyte" , Unit (p 7) "ZiB" "zebibyte" , Unit (p 6) "EiB" "exbibyte" @@ -92,19 +100,37 @@ memoryUnits = , Unit (p 3) "GiB" "gibibyte" , Unit (p 2) "MiB" "mebibyte" , Unit (p 1) "KiB" "kibibyte" - , Unit (p 0) "B" "byte" + , Unit 1 "B" "byte" ] where p :: Integer -> Integer p n = 2^(n*10) -{- Bandwidth units are only measured in bits if you're some crazy telco. -} +{- Bandwidth units are (stupidly) measured in bits, not bytes, and are + - (also stupidly) powers of ten. + - + - While it's fairly common for "Mb", "Gb" etc to be used, that differs + - from "MB", "GB", etc only in case, and readSize is case-insensitive. + - So "Mbit", "Gbit" etc are used instead to avoid parsing ambiguity. + -} bandwidthUnits :: [Unit] -bandwidthUnits = error "stop trying to rip people off" +bandwidthUnits = + [ Unit (p 8) "Ybit" "yottabit" + , Unit (p 7) "Zbit" "zettabit" + , Unit (p 6) "Ebit" "exabit" + , Unit (p 5) "Pbit" "petabit" + , Unit (p 4) "Tbit" "terabit" + , Unit (p 3) "Gbit" "gigabit" + , Unit (p 2) "Mbit" "megabit" + , Unit (p 1) "kbit" "kilobit" -- weird capitalization thanks to committee + ] + where + p :: Integer -> Integer + p n = (1000^n) `div` 8 {- Do you yearn for the days when men were men and megabytes were megabytes? -} oldSchoolUnits :: [Unit] -oldSchoolUnits = zipWith (curry mingle) storageUnits memoryUnits +oldSchoolUnits = zipWith (curry mingle) storageUnits committeeUnits where mingle (Unit _ a n, Unit s' _ _) = Unit s' a n diff --git a/Utility/Debug.hs b/Utility/Debug.hs new file mode 100644 index 0000000..6e6e701 --- /dev/null +++ b/Utility/Debug.hs @@ -0,0 +1,102 @@ +{- Debug output + - + - Copyright 2021 Joey Hess <id@joeyh.name> + - + - License: BSD-2-clause + -} + +{-# LANGUAGE OverloadedStrings, FlexibleInstances, TypeSynonymInstances #-} +{-# LANGUAGE LambdaCase #-} +{-# OPTIONS_GHC -fno-warn-tabs -w #-} + +module Utility.Debug ( + DebugSource(..), + DebugSelector(..), + configureDebug, + getDebugSelector, + debug, + fastDebug +) where + +import qualified Data.ByteString as S +import Data.IORef +import Data.String +import Data.Time +import System.IO.Unsafe (unsafePerformIO) +import qualified Data.Semigroup as Sem +import Prelude + +import Utility.FileSystemEncoding + +-- | The source of a debug message. For example, this could be a module or +-- function name. +newtype DebugSource = DebugSource S.ByteString + deriving (Eq, Show) + +instance IsString DebugSource where + fromString = DebugSource . encodeBS + +-- | Selects whether to display a message from a source. +data DebugSelector + = DebugSelector (DebugSource -> Bool) + | NoDebugSelector + +instance Sem.Semigroup DebugSelector where + DebugSelector a <> DebugSelector b = DebugSelector (\v -> a v || b v) + NoDebugSelector <> NoDebugSelector = NoDebugSelector + NoDebugSelector <> b = b + a <> NoDebugSelector = a + +instance Monoid DebugSelector where + mempty = NoDebugSelector + +-- | Configures debugging. +configureDebug + :: (S.ByteString -> IO ()) + -- ^ Used to display debug output. + -> DebugSelector + -> IO () +configureDebug src p = writeIORef debugConfigGlobal (src, p) + +-- | Gets the currently configured DebugSelector. +getDebugSelector :: IO DebugSelector +getDebugSelector = snd <$> readIORef debugConfigGlobal + +-- A global variable for the debug configuration. +{-# NOINLINE debugConfigGlobal #-} +debugConfigGlobal :: IORef (S.ByteString -> IO (), DebugSelector) +debugConfigGlobal = unsafePerformIO $ newIORef (dontshow, selectnone) + where + dontshow _ = return () + selectnone = NoDebugSelector + +-- | Displays a debug message, if that has been enabled by configureDebug. +-- +-- This is reasonably fast when debugging is not enabled, but since it does +-- have to consult a IORef each time, using it in a tight loop may slow +-- down the program. +debug :: DebugSource -> String -> IO () +debug src msg = readIORef debugConfigGlobal >>= \case + (displayer, NoDebugSelector) -> + displayer =<< formatDebugMessage src msg + (displayer, DebugSelector p) + | p src -> displayer =<< formatDebugMessage src msg + | otherwise -> return () + +-- | Displays a debug message, if the DebugSelector allows. +-- +-- When the DebugSelector does not let the message be displayed, this runs +-- very quickly, allowing it to be used inside tight loops. +fastDebug :: DebugSelector -> DebugSource -> String -> IO () +fastDebug NoDebugSelector src msg = do + (displayer, _) <- readIORef debugConfigGlobal + displayer =<< formatDebugMessage src msg +fastDebug (DebugSelector p) src msg + | p src = fastDebug NoDebugSelector src msg + | otherwise = return () + +formatDebugMessage :: DebugSource -> String -> IO S.ByteString +formatDebugMessage (DebugSource src) msg = do + t <- encodeBS . formatTime defaultTimeLocale "[%F %X%Q]" + <$> getZonedTime + return (t <> " (" <> src <> ") " <> encodeBS msg) diff --git a/Utility/Directory.hs b/Utility/Directory.hs index e2c6a94..a5c023f 100644 --- a/Utility/Directory.hs +++ b/Utility/Directory.hs @@ -1,11 +1,12 @@ {- directory traversal and manipulation - - - Copyright 2011-2014 Joey Hess <id@joeyh.name> + - Copyright 2011-2020 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} {-# LANGUAGE CPP #-} +{-# LANGUAGE LambdaCase #-} {-# OPTIONS_GHC -fno-warn-tabs #-} module Utility.Directory ( @@ -13,25 +14,19 @@ module Utility.Directory ( module Utility.SystemDirectory ) where -import System.IO.Error import Control.Monad import System.FilePath -import System.PosixCompat.Files +import System.PosixCompat.Files (isDirectory, isSymbolicLink) import Control.Applicative import System.IO.Unsafe (unsafeInterleaveIO) import Data.Maybe import Prelude -#ifndef mingw32_HOST_OS -import Utility.SafeCommand -import Control.Monad.IfElse -#endif - import Utility.SystemDirectory -import Utility.Tmp import Utility.Exception import Utility.Monad -import Utility.Applicative +import Utility.FileSystemEncoding +import qualified Utility.RawFilePath as R dirCruft :: FilePath -> Bool dirCruft "." = True @@ -71,7 +66,7 @@ dirContentsRecursiveSkipping skipdir followsubdirsymlinks topdir = go [topdir] | otherwise = do let skip = collect (entry:files) dirs' entries let recurse = collect files (entry:dirs') entries - ms <- catchMaybeIO $ getSymbolicLinkStatus entry + ms <- catchMaybeIO $ R.getSymbolicLinkStatus (toRawFilePath entry) case ms of (Just s) | isDirectory s -> recurse @@ -93,64 +88,14 @@ dirTreeRecursiveSkipping skipdir topdir = go [] [topdir] | skipdir (takeFileName dir) = go c dirs | otherwise = unsafeInterleaveIO $ do subdirs <- go [] - =<< filterM (isDirectory <$$> getSymbolicLinkStatus) + =<< filterM isdir =<< catchDefaultIO [] (dirContents dir) go (subdirs++dir:c) dirs + isdir p = isDirectory <$> R.getSymbolicLinkStatus (toRawFilePath p) -{- Moves one filename to another. - - First tries a rename, but falls back to moving across devices if needed. -} -moveFile :: FilePath -> FilePath -> IO () -moveFile src dest = tryIO (rename src dest) >>= onrename - where - onrename (Right _) = noop - onrename (Left e) - | isPermissionError e = rethrow - | isDoesNotExistError e = rethrow - | otherwise = viaTmp mv dest "" - where - rethrow = throwM e - - mv tmp _ = do - -- copyFile is likely not as optimised as - -- the mv command, so we'll use the command. - -- - -- But, while Windows has a "mv", it does not seem very - -- reliable, so use copyFile there. -#ifndef mingw32_HOST_OS - -- If dest is a directory, mv would move the file - -- into it, which is not desired. - whenM (isdir dest) rethrow - ok <- boolSystem "mv" [Param "-f", Param src, Param tmp] - let e' = e -#else - r <- tryIO $ copyFile src tmp - let (ok, e') = case r of - Left err -> (False, err) - Right _ -> (True, e) -#endif - unless ok $ do - -- delete any partial - _ <- tryIO $ removeFile tmp - throwM e' - -#ifndef mingw32_HOST_OS - isdir f = do - r <- tryIO $ getFileStatus f - case r of - (Left _) -> return False - (Right s) -> return $ isDirectory s -#endif - -{- Removes a file, which may or may not exist, and does not have to - - be a regular file. +{- Use with an action that removes something, which may or may not exist. - - - Note that an exception is thrown if the file exists but - - cannot be removed. -} -nukeFile :: FilePath -> IO () -nukeFile file = void $ tryWhenExists go - where -#ifndef mingw32_HOST_OS - go = removeLink file -#else - go = removeFile file -#endif + - If an exception is thrown due to it not existing, it is ignored. + -} +removeWhenExistsWith :: (a -> IO ()) -> a -> IO () +removeWhenExistsWith f a = void $ tryWhenExists $ f a diff --git a/Utility/Directory/Create.hs b/Utility/Directory/Create.hs new file mode 100644 index 0000000..5650f96 --- /dev/null +++ b/Utility/Directory/Create.hs @@ -0,0 +1,105 @@ +{- directory creating + - + - Copyright 2011-2020 Joey Hess <id@joeyh.name> + - + - License: BSD-2-clause + -} + +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE LambdaCase #-} +{-# OPTIONS_GHC -fno-warn-tabs #-} + +module Utility.Directory.Create ( + createDirectoryUnder, + createDirectoryUnder', +) where + +import Control.Monad +import Control.Applicative +import Control.Monad.IO.Class +import Control.Monad.IfElse +import System.IO.Error +import Data.Maybe +import qualified System.FilePath.ByteString as P +import Prelude + +import Utility.SystemDirectory +import Utility.Path.AbsRel +import Utility.Exception +import Utility.FileSystemEncoding +import qualified Utility.RawFilePath as R +import Utility.PartialPrelude + +{- Like createDirectoryIfMissing True, but it will only create + - missing parent directories up to but not including a directory + - from the first parameter. + - + - For example, createDirectoryUnder ["/tmp/foo"] "/tmp/foo/bar/baz" + - will create /tmp/foo/bar if necessary, but if /tmp/foo does not exist, + - it will throw an exception. + - + - The exception thrown is the same that createDirectory throws if the + - parent directory does not exist. + - + - If the second FilePath is not under the first + - FilePath (or the same as it), it will fail with an exception + - even if the second FilePath's parent directory already exists. + - + - The FilePaths can be relative, or absolute. + - They will be normalized as necessary. + - + - Note that, the second FilePath, if relative, is relative to the current + - working directory. + -} +createDirectoryUnder :: [RawFilePath] -> RawFilePath -> IO () +createDirectoryUnder topdirs dir = + createDirectoryUnder' topdirs dir R.createDirectory + +createDirectoryUnder' + :: (MonadIO m, MonadCatch m) + => [RawFilePath] + -> RawFilePath + -> (RawFilePath -> m ()) + -> m () +createDirectoryUnder' topdirs dir0 mkdir = do + relps <- liftIO $ forM topdirs $ \topdir -> relPathDirToFile topdir dir0 + let relparts = map P.splitDirectories relps + -- Catch cases where dir0 is not beneath a topdir. + -- If the relative path between them starts with "..", + -- it's not. And on Windows, if they are on different drives, + -- the path will not be relative. + let notbeneath = \(_topdir, (relp, dirs)) -> + headMaybe dirs /= Just ".." && not (P.isAbsolute relp) + case filter notbeneath $ zip topdirs (zip relps relparts) of + ((topdir, (_relp, dirs)):_) + -- If dir0 is the same as the topdir, don't try to + -- create it, but make sure it does exist. + | null dirs -> + liftIO $ unlessM (doesDirectoryExist (fromRawFilePath topdir)) $ + ioError $ customerror doesNotExistErrorType $ + "createDirectoryFrom: " ++ fromRawFilePath topdir ++ " does not exist" + | otherwise -> createdirs $ + map (topdir P.</>) (reverse (scanl1 (P.</>) dirs)) + _ -> liftIO $ ioError $ customerror userErrorType + ("createDirectoryFrom: not located in " ++ unwords (map fromRawFilePath topdirs)) + where + customerror t s = mkIOError t s Nothing (Just (fromRawFilePath dir0)) + + createdirs [] = pure () + createdirs (dir:[]) = createdir dir (liftIO . ioError) + createdirs (dir:dirs) = createdir dir $ \_ -> do + createdirs dirs + createdir dir (liftIO . ioError) + + -- This is the same method used by createDirectoryIfMissing, + -- in particular the handling of errors that occur when the + -- directory already exists. See its source for explanation + -- of several subtleties. + createdir dir notexisthandler = tryIO (mkdir dir) >>= \case + Right () -> pure () + Left e + | isDoesNotExistError e -> notexisthandler e + | isAlreadyExistsError e || isPermissionError e -> + liftIO $ unlessM (doesDirectoryExist (fromRawFilePath dir)) $ + ioError e + | otherwise -> liftIO $ ioError e diff --git a/Utility/DottedVersion.hs b/Utility/DottedVersion.hs index dff3717..84b8463 100644 --- a/Utility/DottedVersion.hs +++ b/Utility/DottedVersion.hs @@ -13,7 +13,7 @@ module Utility.DottedVersion ( normalize, ) where -import Common +import Utility.Split data DottedVersion = DottedVersion String Integer deriving (Eq) diff --git a/Utility/Env/Set.hs b/Utility/Env/Set.hs index f14674c..45d2e7f 100644 --- a/Utility/Env/Set.hs +++ b/Utility/Env/Set.hs @@ -10,6 +10,7 @@ module Utility.Env.Set ( setEnv, unsetEnv, + legalInEnvVar, ) where #ifdef mingw32_HOST_OS @@ -18,6 +19,7 @@ import Utility.Env #else import qualified System.Posix.Env as PE #endif +import Data.Char {- Sets an environment variable. To overwrite an existing variable, - overwrite must be True. @@ -41,3 +43,7 @@ unsetEnv = PE.unsetEnv #else unsetEnv = System.SetEnv.unsetEnv #endif + +legalInEnvVar :: Char -> Bool +legalInEnvVar '_' = True +legalInEnvVar c = isAsciiLower c || isAsciiUpper c || (isNumber c && isAscii c) diff --git a/Utility/Exception.hs b/Utility/Exception.hs index bcadb78..cf55c5f 100644 --- a/Utility/Exception.hs +++ b/Utility/Exception.hs @@ -1,6 +1,6 @@ {- Simple IO exception handling (and some more) - - - Copyright 2011-2016 Joey Hess <id@joeyh.name> + - Copyright 2011-2023 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} @@ -20,6 +20,7 @@ module Utility.Exception ( bracketIO, catchNonAsync, tryNonAsync, + nonAsyncHandler, tryWhenExists, catchIOErrorType, IOErrorType(..), @@ -28,21 +29,24 @@ module Utility.Exception ( import Control.Monad.Catch as X hiding (Handler) import qualified Control.Monad.Catch as M -import Control.Exception (IOException, AsyncException) -import Control.Exception (SomeAsyncException) +import Control.Exception (IOException, AsyncException, SomeAsyncException) import Control.Monad import Control.Monad.IO.Class (liftIO, MonadIO) import System.IO.Error (isDoesNotExistError, ioeGetErrorType) import GHC.IO.Exception (IOErrorType(..)) import Utility.Data +import Utility.SafeOutput {- Like error, this throws an exception. Unlike error, if this exception - is not caught, it won't generate a backtrace. So use this for situations - - where there's a problem that the user is excpected to see in some - - circumstances. -} + - where there's a problem that the user is expected to see in some + - circumstances. + - + - Also, control characters are filtered out of the message. + -} giveup :: [Char] -> a -giveup = errorWithoutStackTrace +giveup = errorWithoutStackTrace . safeOutput {- Catches IO errors and returns a Bool -} catchBoolIO :: MonadCatch m => m Bool -> m Bool @@ -81,11 +85,7 @@ bracketIO setup cleanup = bracket (liftIO setup) (liftIO . cleanup) - ThreadKilled and UserInterrupt get through. -} catchNonAsync :: MonadCatch m => m a -> (SomeException -> m a) -> m a -catchNonAsync a onerr = a `catches` - [ M.Handler (\ (e :: AsyncException) -> throwM e) - , M.Handler (\ (e :: SomeAsyncException) -> throwM e) - , M.Handler (\ (e :: SomeException) -> onerr e) - ] +catchNonAsync a onerr = a `catches` (nonAsyncHandler onerr) tryNonAsync :: MonadCatch m => m a -> m (Either SomeException a) tryNonAsync a = go `catchNonAsync` (return . Left) @@ -94,6 +94,13 @@ tryNonAsync a = go `catchNonAsync` (return . Left) v <- a return (Right v) +nonAsyncHandler :: MonadCatch m => (SomeException -> m a) -> [M.Handler m a] +nonAsyncHandler onerr = + [ M.Handler (\ (e :: AsyncException) -> throwM e) + , M.Handler (\ (e :: SomeAsyncException) -> throwM e) + , M.Handler (\ (e :: SomeException) -> onerr e) + ] + {- Catches only DoesNotExist exceptions, and lets all others through. -} tryWhenExists :: MonadCatch m => m a -> m (Maybe a) tryWhenExists a = do diff --git a/Utility/FileMode.hs b/Utility/FileMode.hs index 7d36c55..ecc19d8 100644 --- a/Utility/FileMode.hs +++ b/Utility/FileMode.hs @@ -1,11 +1,12 @@ {- File mode utilities. - - - Copyright 2010-2017 Joey Hess <id@joeyh.name> + - Copyright 2010-2023 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} {-# LANGUAGE CPP #-} +{-# OPTIONS_GHC -fno-warn-tabs #-} module Utility.FileMode ( module Utility.FileMode, @@ -15,32 +16,33 @@ module Utility.FileMode ( import System.IO import Control.Monad import System.PosixCompat.Types -import System.PosixCompat.Files +import System.PosixCompat.Files (unionFileModes, intersectFileModes, stdFileMode, nullFileMode, groupReadMode, ownerReadMode, ownerWriteMode, ownerExecuteMode, groupWriteMode, groupExecuteMode, otherReadMode, otherWriteMode, otherExecuteMode, fileMode) #ifndef mingw32_HOST_OS -import System.Posix.Files (symbolicLinkMode) -import Control.Monad.IO.Class (liftIO) +import System.PosixCompat.Files (setFileCreationMask) #endif -import Control.Monad.IO.Class (MonadIO) +import Control.Monad.IO.Class import Foreign (complement) import Control.Monad.Catch import Utility.Exception +import Utility.FileSystemEncoding +import qualified Utility.RawFilePath as R {- Applies a conversion function to a file's mode. -} -modifyFileMode :: FilePath -> (FileMode -> FileMode) -> IO () +modifyFileMode :: RawFilePath -> (FileMode -> FileMode) -> IO () modifyFileMode f convert = void $ modifyFileMode' f convert -modifyFileMode' :: FilePath -> (FileMode -> FileMode) -> IO FileMode +modifyFileMode' :: RawFilePath -> (FileMode -> FileMode) -> IO FileMode modifyFileMode' f convert = do - s <- getFileStatus f + s <- R.getFileStatus f let old = fileMode s let new = convert old when (new /= old) $ - setFileMode f new + R.setFileMode f new return old {- Runs an action after changing a file's mode, then restores the old mode. -} -withModifiedFileMode :: FilePath -> (FileMode -> FileMode) -> IO a -> IO a +withModifiedFileMode :: RawFilePath -> (FileMode -> FileMode) -> IO a -> IO a withModifiedFileMode file convert a = bracket setup cleanup go where setup = modifyFileMode' file convert @@ -73,15 +75,15 @@ otherGroupModes = ] {- Removes the write bits from a file. -} -preventWrite :: FilePath -> IO () +preventWrite :: RawFilePath -> IO () preventWrite f = modifyFileMode f $ removeModes writeModes {- Turns a file's owner write bit back on. -} -allowWrite :: FilePath -> IO () +allowWrite :: RawFilePath -> IO () allowWrite f = modifyFileMode f $ addModes [ownerWriteMode] {- Turns a file's owner read bit back on. -} -allowRead :: FilePath -> IO () +allowRead :: RawFilePath -> IO () allowRead f = modifyFileMode f $ addModes [ownerReadMode] {- Allows owner and group to read and write to a file. -} @@ -91,34 +93,29 @@ groupSharedModes = , ownerReadMode, groupReadMode ] -groupWriteRead :: FilePath -> IO () +groupWriteRead :: RawFilePath -> IO () groupWriteRead f = modifyFileMode f $ addModes groupSharedModes checkMode :: FileMode -> FileMode -> Bool checkMode checkfor mode = checkfor `intersectFileModes` mode == checkfor -{- Checks if a file mode indicates it's a symlink. -} -isSymLink :: FileMode -> Bool -#ifdef mingw32_HOST_OS -isSymLink _ = False -#else -isSymLink = checkMode symbolicLinkMode -#endif - {- Checks if a file has any executable bits set. -} isExecutable :: FileMode -> Bool isExecutable mode = combineModes executeModes `intersectFileModes` mode /= 0 -{- Runs an action without that pesky umask influencing it, unless the - - passed FileMode is the standard one. -} -noUmask :: (MonadIO m, MonadMask m) => FileMode -> m a -> m a -#ifndef mingw32_HOST_OS -noUmask mode a - | mode == stdFileMode = a - | otherwise = withUmask nullFileMode a -#else -noUmask _ a = a -#endif +data ModeSetter = ModeSetter FileMode (RawFilePath -> IO ()) + +{- Runs an action which should create the file, passing it the desired + - initial file mode. Then runs the ModeSetter's action on the file, which + - can adjust the initial mode if umask prevented the file from being + - created with the right mode. -} +applyModeSetter :: Maybe ModeSetter -> RawFilePath -> (Maybe FileMode -> IO a) -> IO a +applyModeSetter (Just (ModeSetter mode modeaction)) file a = do + r <- a (Just mode) + void $ tryIO $ modeaction file + return r +applyModeSetter Nothing _ a = + a Nothing withUmask :: (MonadIO m, MonadMask m) => FileMode -> m a -> m a #ifndef mingw32_HOST_OS @@ -160,7 +157,7 @@ isSticky = checkMode stickyMode stickyMode :: FileMode stickyMode = 512 -setSticky :: FilePath -> IO () +setSticky :: RawFilePath -> IO () setSticky f = modifyFileMode f $ addModes [stickyMode] #endif @@ -173,15 +170,15 @@ setSticky f = modifyFileMode f $ addModes [stickyMode] - On a filesystem that does not support file permissions, this is the same - as writeFile. -} -writeFileProtected :: FilePath -> String -> IO () +writeFileProtected :: RawFilePath -> String -> IO () writeFileProtected file content = writeFileProtected' file (\h -> hPutStr h content) -writeFileProtected' :: FilePath -> (Handle -> IO ()) -> IO () -writeFileProtected' file writer = protectedOutput $ - withFile file WriteMode $ \h -> do - void $ tryIO $ modifyFileMode file $ removeModes otherGroupModes - writer h +writeFileProtected' :: RawFilePath -> (Handle -> IO ()) -> IO () +writeFileProtected' file writer = do + h <- protectedOutput $ openFile (fromRawFilePath file) WriteMode + void $ tryIO $ modifyFileMode file $ removeModes otherGroupModes + writer h protectedOutput :: IO a -> IO a protectedOutput = withUmask 0o0077 diff --git a/Utility/FileSize.hs b/Utility/FileSize.hs index 8544ad4..3d216f2 100644 --- a/Utility/FileSize.hs +++ b/Utility/FileSize.hs @@ -1,5 +1,7 @@ {- File size. - + - Copyright 2015-2020 Joey Hess <id@joeyh.name> + - - License: BSD-2-clause -} @@ -12,11 +14,15 @@ module Utility.FileSize ( getFileSize', ) where -import System.PosixCompat.Files #ifdef mingw32_HOST_OS import Control.Exception (bracket) import System.IO +import Utility.FileSystemEncoding +#else +import System.PosixCompat.Files (fileSize) #endif +import System.PosixCompat.Files (FileStatus) +import qualified Utility.RawFilePath as R type FileSize = Integer @@ -26,18 +32,18 @@ type FileSize = Integer - FileOffset which maxes out at 2 gb. - See https://github.com/jystic/unix-compat/issues/16 -} -getFileSize :: FilePath -> IO FileSize +getFileSize :: R.RawFilePath -> IO FileSize #ifndef mingw32_HOST_OS -getFileSize f = fmap (fromIntegral . fileSize) (getFileStatus f) +getFileSize f = fmap (fromIntegral . fileSize) (R.getFileStatus f) #else -getFileSize f = bracket (openFile f ReadMode) hClose hFileSize +getFileSize f = bracket (openFile (fromRawFilePath f) ReadMode) hClose hFileSize #endif {- Gets the size of the file, when its FileStatus is already known. - - On windows, uses getFileSize. Otherwise, the FileStatus contains the - size, so this does not do any work. -} -getFileSize' :: FilePath -> FileStatus -> IO FileSize +getFileSize' :: R.RawFilePath -> FileStatus -> IO FileSize #ifndef mingw32_HOST_OS getFileSize' _ s = return $ fromIntegral $ fileSize s #else diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs index f9e9814..2a1dc81 100644 --- a/Utility/FileSystemEncoding.hs +++ b/Utility/FileSystemEncoding.hs @@ -1,6 +1,6 @@ {- GHC File system encoding handling. - - - Copyright 2012-2016 Joey Hess <id@joeyh.name> + - Copyright 2012-2021 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} @@ -11,7 +11,6 @@ module Utility.FileSystemEncoding ( useFileSystemEncoding, fileEncoding, - withFilePath, RawFilePath, fromRawFilePath, toRawFilePath, @@ -19,34 +18,22 @@ module Utility.FileSystemEncoding ( encodeBL, decodeBS, encodeBS, - decodeBL', - encodeBL', - decodeBS', - encodeBS', truncateFilePath, - s2w8, - w82s, - c2w8, - w82c, ) where import qualified GHC.Foreign as GHC import qualified GHC.IO.Encoding as Encoding -import Foreign.C import System.IO import System.IO.Unsafe -import Data.Word -import Data.List +import System.FilePath.ByteString (RawFilePath, encodeFilePath, decodeFilePath) import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L +import Data.ByteString.Unsafe (unsafePackMallocCStringLen) #ifdef mingw32_HOST_OS import qualified Data.ByteString.UTF8 as S8 import qualified Data.ByteString.Lazy.UTF8 as L8 #endif -import Utility.Exception -import Utility.Split - {- Makes all subsequent Handles that are opened, as well as stdio Handles, - use the filesystem encoding, instead of the encoding of the current - locale. @@ -79,40 +66,10 @@ fileEncoding h = hSetEncoding h =<< Encoding.getFileSystemEncoding fileEncoding h = hSetEncoding h Encoding.utf8 #endif -{- Marshal a Haskell FilePath into a NUL terminated C string using temporary - - storage. The FilePath is encoded using the filesystem encoding, - - reversing the decoding that should have been done when the FilePath - - was obtained. -} -withFilePath :: FilePath -> (CString -> IO a) -> IO a -withFilePath fp f = Encoding.getFileSystemEncoding - >>= \enc -> GHC.withCString enc fp f - -{- Encodes a FilePath into a String, applying the filesystem encoding. - - - - There are very few things it makes sense to do with such an encoded - - string. It's not a legal filename; it should not be displayed. - - So this function is not exported, but instead used by the few functions - - that can usefully consume it. - - - - This use of unsafePerformIO is belived to be safe; GHC's interface - - only allows doing this conversion with CStrings, and the CString buffer - - is allocated, used, and deallocated within the call, with no side - - effects. - - - - If the FilePath contains a value that is not legal in the filesystem - - encoding, rather than thowing an exception, it will be returned as-is. - -} -{-# NOINLINE _encodeFilePath #-} -_encodeFilePath :: FilePath -> String -_encodeFilePath fp = unsafePerformIO $ do - enc <- Encoding.getFileSystemEncoding - GHC.withCString enc fp (GHC.peekCString Encoding.char8) - `catchNonAsync` (\_ -> return fp) - {- Decodes a ByteString into a FilePath, applying the filesystem encoding. -} decodeBL :: L.ByteString -> FilePath #ifndef mingw32_HOST_OS -decodeBL = encodeW8NUL . L.unpack +decodeBL = decodeBS . L.toStrict #else {- On Windows, we assume that the ByteString is utf-8, since Windows - only uses unicode for filenames. -} @@ -122,111 +79,44 @@ decodeBL = L8.toString {- Encodes a FilePath into a ByteString, applying the filesystem encoding. -} encodeBL :: FilePath -> L.ByteString #ifndef mingw32_HOST_OS -encodeBL = L.pack . decodeW8NUL +encodeBL = L.fromStrict . encodeBS #else encodeBL = L8.fromString #endif decodeBS :: S.ByteString -> FilePath #ifndef mingw32_HOST_OS -decodeBS = encodeW8NUL . S.unpack +-- This does the same thing as System.FilePath.ByteString.decodeFilePath, +-- with an identical implementation. However, older versions of that library +-- truncated at NUL, which this must not do, because it may end up used on +-- something other than a unix filepath. +{-# NOINLINE decodeBS #-} +decodeBS b = unsafePerformIO $ do + enc <- Encoding.getFileSystemEncoding + S.useAsCStringLen b (GHC.peekCStringLen enc) #else decodeBS = S8.toString #endif encodeBS :: FilePath -> S.ByteString #ifndef mingw32_HOST_OS -encodeBS = S.pack . decodeW8NUL +-- This does the same thing as System.FilePath.ByteString.encodeFilePath, +-- with an identical implementation. However, older versions of that library +-- truncated at NUL, which this must not do, because it may end up used on +-- something other than a unix filepath. +{-# NOINLINE encodeBS #-} +encodeBS f = unsafePerformIO $ do + enc <- Encoding.getFileSystemEncoding + GHC.newCStringLen enc f >>= unsafePackMallocCStringLen #else encodeBS = S8.fromString #endif -{- Faster version that assumes the string does not contain NUL; - - if it does it will be truncated before the NUL. -} -decodeBS' :: S.ByteString -> FilePath -#ifndef mingw32_HOST_OS -decodeBS' = encodeW8 . S.unpack -#else -decodeBS' = S8.toString -#endif - -encodeBS' :: FilePath -> S.ByteString -#ifndef mingw32_HOST_OS -encodeBS' = S.pack . decodeW8 -#else -encodeBS' = S8.fromString -#endif - -decodeBL' :: L.ByteString -> FilePath -#ifndef mingw32_HOST_OS -decodeBL' = encodeW8 . L.unpack -#else -decodeBL' = L8.toString -#endif - -encodeBL' :: FilePath -> L.ByteString -#ifndef mingw32_HOST_OS -encodeBL' = L.pack . decodeW8 -#else -encodeBL' = L8.fromString -#endif - -{- Recent versions of the unix package have this alias; defined here - - for backwards compatibility. -} -type RawFilePath = S.ByteString - -{- Note that the RawFilePath is assumed to never contain NUL, - - since filename's don't. This should only be used with actual - - RawFilePaths not arbitrary ByteString that may contain NUL. -} fromRawFilePath :: RawFilePath -> FilePath -fromRawFilePath = decodeBS' +fromRawFilePath = decodeFilePath -{- Note that the FilePath is assumed to never contain NUL, - - since filename's don't. This should only be used with actual FilePaths - - not arbitrary String that may contain NUL. -} toRawFilePath :: FilePath -> RawFilePath -toRawFilePath = encodeBS' - -{- Converts a [Word8] to a FilePath, encoding using the filesystem encoding. - - - - w82s produces a String, which may contain Chars that are invalid - - unicode. From there, this is really a simple matter of applying the - - file system encoding, only complicated by GHC's interface to doing so. - - - - Note that the encoding stops at any NUL in the input. FilePaths - - cannot contain embedded NUL, but Haskell Strings may. - -} -{-# NOINLINE encodeW8 #-} -encodeW8 :: [Word8] -> FilePath -encodeW8 w8 = unsafePerformIO $ do - enc <- Encoding.getFileSystemEncoding - GHC.withCString Encoding.char8 (w82s w8) $ GHC.peekCString enc - -decodeW8 :: FilePath -> [Word8] -decodeW8 = s2w8 . _encodeFilePath - -{- Like encodeW8 and decodeW8, but NULs are passed through unchanged. -} -encodeW8NUL :: [Word8] -> FilePath -encodeW8NUL = intercalate [nul] . map encodeW8 . splitc (c2w8 nul) - where - nul = '\NUL' - -decodeW8NUL :: FilePath -> [Word8] -decodeW8NUL = intercalate [c2w8 nul] . map decodeW8 . splitc nul - where - nul = '\NUL' - -c2w8 :: Char -> Word8 -c2w8 = fromIntegral . fromEnum - -w82c :: Word8 -> Char -w82c = toEnum . fromIntegral - -s2w8 :: String -> [Word8] -s2w8 = map c2w8 - -w82s :: [Word8] -> String -w82s = map w82c +toRawFilePath = encodeFilePath {- Truncates a FilePath to the given number of bytes (or less), - as represented on disk. @@ -239,8 +129,8 @@ truncateFilePath :: Int -> FilePath -> FilePath truncateFilePath n = go . reverse where go f = - let bytes = decodeW8 f - in if length bytes <= n + let b = encodeBS f + in if S.length b <= n then reverse f else go (drop 1 f) #else diff --git a/Utility/Format.hs b/Utility/Format.hs index a2470fa..930b7ee 100644 --- a/Utility/Format.hs +++ b/Utility/Format.hs @@ -1,6 +1,6 @@ {- Formatted string handling. - - - Copyright 2010, 2011 Joey Hess <id@joeyh.name> + - Copyright 2010-2023 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} @@ -9,8 +9,12 @@ module Utility.Format ( Format, gen, format, + escapedFormat, + formatContainsVar, decode_c, encode_c, + encode_c', + isUtf8Byte, prop_encode_c_decode_c_roundtrip ) where @@ -19,19 +23,23 @@ import Data.Char (isAlphaNum, isOctDigit, isHexDigit, isSpace, chr, ord, isAscii import Data.Maybe (fromMaybe) import Data.Word (Word8) import Data.List (isPrefixOf) -import qualified Codec.Binary.UTF8.String import qualified Data.Map as M +import qualified Data.ByteString as S import Utility.PartialPrelude - -type FormatString = String +import Utility.FileSystemEncoding {- A format consists of a list of fragments. -} type Format = [Frag] -{- A fragment is either a constant string, - - or a variable, with a justification. -} -data Frag = Const String | Var String Justify +{- A fragment is either a constant string, or a variable. -} +data Frag + = Const String + | Var + { varName :: String + , varJustify :: Justify + , varEscaped :: Bool + } deriving (Show) data Justify = LeftJustified Int | RightJustified Int | UnJustified @@ -45,10 +53,9 @@ format :: Format -> Variables -> String format f vars = concatMap expand f where expand (Const s) = s - expand (Var name j) - | "escaped_" `isPrefixOf` name = - justify j $ encode_c_strict $ - getvar $ drop (length "escaped_") name + expand (Var name j esc) + | esc = justify j $ decodeBS $ escapedFormat $ + encodeBS $ getvar name | otherwise = justify j $ getvar name getvar name = fromMaybe "" $ M.lookup name vars justify UnJustified s = s @@ -57,13 +64,22 @@ format f vars = concatMap expand f pad i s = take (i - length s) spaces spaces = repeat ' ' +escapedFormat :: S.ByteString -> S.ByteString +escapedFormat = encode_c needescape + where + needescape c = isUtf8Byte c || + isSpace (chr (fromIntegral c)) || + c == fromIntegral (ord '"') + {- Generates a Format that can be used to expand variables in a - format string, such as "${foo} ${bar;10} ${baz;-10}\n" - - (This is the same type of format string used by dpkg-query.) + - + - Also, "${escaped_foo}" will apply encode_c to the value of variable foo. -} -gen :: FormatString -> Format -gen = filter (not . empty) . fuse [] . scan [] . decode_c +gen :: String -> Format +gen = filter (not . empty) . fuse [] . scan [] . decodeBS . decode_c . encodeBS where -- The Format is built up in reverse, for efficiency, -- and can have many adjacent Consts. Fusing it fixes both @@ -94,42 +110,71 @@ gen = filter (not . empty) . fuse [] . scan [] . decode_c | i < 0 = LeftJustified (-1 * i) | otherwise = RightJustified i novar v = "${" ++ reverse v - foundvar f v p = scan (Var (reverse v) p : f) + foundvar f varname_r p = + let varname = reverse varname_r + var = if "escaped_" `isPrefixOf` varname + then Var (drop (length "escaped_") varname) p True + else Var varname p False + in scan (var : f) empty :: Frag -> Bool empty (Const "") = True empty _ = False +{- Check if a Format contains a variable with a specified name. -} +formatContainsVar :: String -> Format -> Bool +formatContainsVar v = any go + where + go (Var v' _ _) | v' == v = True + go _ = False + {- Decodes a C-style encoding, where \n is a newline (etc), - \NNN is an octal encoded character, and \xNN is a hex encoded character. -} -decode_c :: FormatString -> String -decode_c [] = [] -decode_c s = unescape ("", s) +decode_c :: S.ByteString -> S.ByteString +decode_c s + | S.null s = S.empty + | otherwise = unescape (S.empty, s) where - e = '\\' - unescape (b, []) = b - -- look for escapes starting with '\' - unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair) + e = fromIntegral (ord '\\') + x = fromIntegral (ord 'x') + isescape c = c == e + unescape (b, v) + | S.null v = b + | otherwise = b <> fst pair <> unescape (handle $ snd pair) where - pair = span (/= e) v - isescape x = x == e - handle (x:'x':n1:n2:rest) - | isescape x && allhex = (fromhex, rest) + pair = S.span (not . isescape) v + handle b + | S.length b >= 1 && isescape (S.index b 0) = handle' b + | otherwise = (S.empty, b) + + handle' b + | S.length b >= 4 + && S.index b 1 == x + && allhex = (fromhex, rest) where + n1 = chr (fromIntegral (S.index b 2)) + n2 = chr (fromIntegral (S.index b 3)) + rest = S.drop 4 b allhex = isHexDigit n1 && isHexDigit n2 - fromhex = [chr $ readhex [n1, n2]] + fromhex = encodeBS [chr $ readhex [n1, n2]] readhex h = Prelude.read $ "0x" ++ h :: Int - handle (x:n1:n2:n3:rest) - | isescape x && alloctal = (fromoctal, rest) + handle' b + | S.length b >= 4 && alloctal = (fromoctal, rest) where + n1 = chr (fromIntegral (S.index b 1)) + n2 = chr (fromIntegral (S.index b 2)) + n3 = chr (fromIntegral (S.index b 3)) + rest = S.drop 4 b alloctal = isOctDigit n1 && isOctDigit n2 && isOctDigit n3 - fromoctal = [chr $ readoctal [n1, n2, n3]] + fromoctal = encodeBS [chr $ readoctal [n1, n2, n3]] readoctal o = Prelude.read $ "0o" ++ o :: Int - -- \C is used for a few special characters - handle (x:nc:rest) - | isescape x = ([echar nc], rest) + handle' b + | S.length b >= 2 = + (S.singleton (fromIntegral (ord (echar nc))), rest) where + nc = chr (fromIntegral (S.index b 1)) + rest = S.drop 2 b echar 'a' = '\a' echar 'b' = '\b' echar 'f' = '\f' @@ -137,41 +182,50 @@ decode_c s = unescape ("", s) echar 'r' = '\r' echar 't' = '\t' echar 'v' = '\v' - echar a = a - handle n = ("", n) + echar a = a -- \\ decodes to '\', and \" to '"' + handle' b = (S.empty, b) -{- Inverse of decode_c. -} -encode_c :: String -> FormatString -encode_c = encode_c' (const False) - -{- Encodes more strictly, including whitespace. -} -encode_c_strict :: String -> FormatString -encode_c_strict = encode_c' isSpace - -encode_c' :: (Char -> Bool) -> String -> FormatString -encode_c' p = concatMap echar +{- Inverse of decode_c. Encodes ascii control characters as well as + - bytes that match the predicate. (And also '\' itself.) + -} +encode_c :: (Word8 -> Bool) -> S.ByteString -> S.ByteString +encode_c p s = fromMaybe s (encode_c' p s) + +{- Returns Nothing when nothing needs to be escaped in the input ByteString. -} +encode_c' :: (Word8 -> Bool) -> S.ByteString -> Maybe S.ByteString +encode_c' p s + | S.any needencode s = Just (S.concatMap echar s) + | otherwise = Nothing where - e c = '\\' : [c] - echar '\a' = e 'a' - echar '\b' = e 'b' - echar '\f' = e 'f' - echar '\n' = e 'n' - echar '\r' = e 'r' - echar '\t' = e 't' - echar '\v' = e 'v' - echar '\\' = e '\\' - echar '"' = e '"' + e = fromIntegral (ord '\\') + q = fromIntegral (ord '"') + del = 0x7F + iscontrol c = c < 0x20 + + echar 0x7 = ec 'a' + echar 0x8 = ec 'b' + echar 0x0C = ec 'f' + echar 0x0A = ec 'n' + echar 0x0D = ec 'r' + echar 0x09 = ec 't' + echar 0x0B = ec 'v' echar c - | ord c < 0x20 = e_asc c -- low ascii - | ord c >= 256 = e_utf c -- unicode - | ord c > 0x7E = e_asc c -- high ascii - | p c = e_asc c -- unprintable ascii - | otherwise = [c] -- printable ascii - -- unicode character is decomposed to individual Word8s, - -- and each is shown in octal - e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8]) - e_asc c = showoctal $ ord c - showoctal i = '\\' : printf "%03o" i + | iscontrol c = showoctal c -- other control characters + | c == e = ec '\\' -- escape the escape character itself + | c == del = showoctal c + | p c = if c == q + then ec '"' -- escape double quote + else showoctal c + | otherwise = S.singleton c + + needencode c = iscontrol c || c == e || c == del || p c + + ec c = S.pack [e, fromIntegral (ord c)] + + showoctal i = encodeBS ('\\' : printf "%03o" i) + +isUtf8Byte :: Word8 -> Bool +isUtf8Byte c = c >= 0x80 {- For quickcheck. - @@ -182,6 +236,7 @@ encode_c' p = concatMap echar - This property papers over the problem, by only testing ascii. -} prop_encode_c_decode_c_roundtrip :: String -> Bool -prop_encode_c_decode_c_roundtrip s = s' == decode_c (encode_c s') +prop_encode_c_decode_c_roundtrip s = s' == + decodeBS (decode_c (encode_c isUtf8Byte (encodeBS s'))) where s' = filter isAscii s diff --git a/Utility/HumanNumber.hs b/Utility/HumanNumber.hs index 6143cef..04a18b0 100644 --- a/Utility/HumanNumber.hs +++ b/Utility/HumanNumber.hs @@ -1,6 +1,6 @@ {- numbers for humans - - - Copyright 2012-2013 Joey Hess <id@joeyh.name> + - Copyright 2012-2021 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} @@ -11,11 +11,15 @@ module Utility.HumanNumber (showImprecise) where - of decimal digits. -} showImprecise :: RealFrac a => Int -> a -> String showImprecise precision n - | precision == 0 || remainder == 0 = show (round n :: Integer) - | otherwise = show int ++ "." ++ striptrailing0s (pad0s $ show remainder) + | precision == 0 || remainder' == 0 = show (round n :: Integer) + | otherwise = show int' ++ "." ++ striptrailing0s (pad0s $ show remainder') where int :: Integer (int, frac) = properFraction n remainder = round (frac * 10 ^ precision) :: Integer + (int', remainder') + -- carry the 1 + | remainder == 10 ^ precision = (int + 1, 0) + | otherwise = (int, remainder) pad0s s = replicate (precision - length s) '0' ++ s striptrailing0s = reverse . dropWhile (== '0') . reverse diff --git a/Utility/HumanTime.hs b/Utility/HumanTime.hs index 51338b3..5178531 100644 --- a/Utility/HumanTime.hs +++ b/Utility/HumanTime.hs @@ -44,8 +44,10 @@ daysToDuration :: Integer -> Duration daysToDuration i = Duration $ i * dsecs {- Parses a human-input time duration, of the form "5h", "1m", "5h1m", etc -} -parseDuration :: MonadFail m => String -> m Duration -parseDuration = maybe parsefail (return . Duration) . go 0 +parseDuration :: String -> Either String Duration +parseDuration d + | null d = parsefail + | otherwise = maybe parsefail (Right . Duration) $ go 0 d where go n [] = return n go n s = do @@ -55,7 +57,7 @@ parseDuration = maybe parsefail (return . Duration) . go 0 u <- M.lookup c unitmap go (n + num * u) rest _ -> return $ n + num - parsefail = fail "duration parse error; expected eg \"5m\" or \"1h5m\"" + parsefail = Left $ "failed to parse duration \"" ++ d ++ "\" (expected eg \"5m\" or \"1h5m\")" fromDuration :: Duration -> String fromDuration Duration { durationSeconds = d } @@ -101,4 +103,4 @@ instance Arbitrary Duration where arbitrary = Duration <$> nonNegative arbitrary prop_duration_roundtrips :: Duration -> Bool -prop_duration_roundtrips d = parseDuration (fromDuration d) == Just d +prop_duration_roundtrips d = parseDuration (fromDuration d) == Right d diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs new file mode 100644 index 0000000..3828bc6 --- /dev/null +++ b/Utility/InodeCache.hs @@ -0,0 +1,310 @@ +{- Caching a file's inode, size, and modification time + - to see when it's changed. + - + - Copyright 2013-2019 Joey Hess <id@joeyh.name> + - + - License: BSD-2-clause + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE TypeSynonymInstances #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Utility.InodeCache ( + InodeCache, + mkInodeCache, + InodeComparisonType(..), + inodeCacheFileSize, + + compareStrong, + compareWeak, + compareBy, + + readInodeCache, + showInodeCache, + genInodeCache, + toInodeCache, + toInodeCache', + + InodeCacheKey, + inodeCacheToKey, + inodeCacheToFileSize, + inodeCacheToMtime, + inodeCacheToEpochTime, + inodeCacheEpochTimeRange, + replaceInode, + + SentinalFile(..), + SentinalStatus(..), + TSDelta, + noTSDelta, + writeSentinalFile, + checkSentinalFile, + sentinalFileExists, + + prop_read_show_inodecache +) where + +import Common +import Utility.TimeStamp +import Utility.QuickCheck +import qualified Utility.RawFilePath as R + +import System.PosixCompat.Types +import System.PosixCompat.Files (isRegularFile, fileID) +import Data.Time.Clock.POSIX + +#ifndef mingw32_HOST_OS +import qualified System.Posix.Files as Posix +#endif + +data InodeCachePrim = InodeCachePrim FileID FileSize MTime + deriving (Show, Eq, Ord) + +newtype InodeCache = InodeCache InodeCachePrim + deriving (Show) + +mkInodeCache :: FileID -> FileSize -> POSIXTime -> InodeCache +mkInodeCache inode sz mtime = InodeCache $ + InodeCachePrim inode sz (MTimeHighRes mtime) + +inodeCacheFileSize :: InodeCache -> FileSize +inodeCacheFileSize (InodeCache (InodeCachePrim _ sz _)) = sz + +{- Inode caches can be compared in two different ways, either weakly + - or strongly. -} +data InodeComparisonType = Weakly | Strongly + deriving (Eq, Ord, Show) + +{- Strong comparison, including inodes. -} +compareStrong :: InodeCache -> InodeCache -> Bool +compareStrong (InodeCache x) (InodeCache y) = x == y + +{- Weak comparison of the inode caches, comparing the size and mtime, + - but not the actual inode. Useful when inodes have changed, perhaps + - due to some filesystems being remounted. + - + - The weak mtime comparison treats any mtimes that are within 2 seconds + - of one-another as the same. This is because FAT has only a 2 second + - resolution. When a FAT filesystem is used on Linux, higher resolution + - timestamps maybe are cached and used by Linux, but they are lost + - on unmount, so after a remount, the timestamp can appear to have changed. + -} +compareWeak :: InodeCache -> InodeCache -> Bool +compareWeak (InodeCache (InodeCachePrim _ size1 mtime1)) (InodeCache (InodeCachePrim _ size2 mtime2)) = + size1 == size2 && (abs (lowResTime mtime1 - lowResTime mtime2) < 2) + +compareBy :: InodeComparisonType -> InodeCache -> InodeCache -> Bool +compareBy Strongly = compareStrong +compareBy Weakly = compareWeak + +{- For use in a Map; it's determined at creation time whether this + - uses strong or weak comparison for Eq. -} +data InodeCacheKey = InodeCacheKey InodeComparisonType InodeCachePrim + deriving (Ord, Show) + +instance Eq InodeCacheKey where + (InodeCacheKey ctx x) == (InodeCacheKey cty y) = + compareBy (maximum [ctx,cty]) (InodeCache x ) (InodeCache y) + +inodeCacheToKey :: InodeComparisonType -> InodeCache -> InodeCacheKey +inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim + +inodeCacheToFileSize :: InodeCache -> FileSize +inodeCacheToFileSize (InodeCache (InodeCachePrim _ sz _)) = sz + +inodeCacheToMtime :: InodeCache -> POSIXTime +inodeCacheToMtime (InodeCache (InodeCachePrim _ _ mtime)) = highResTime mtime + +inodeCacheToEpochTime :: InodeCache -> EpochTime +inodeCacheToEpochTime (InodeCache (InodeCachePrim _ _ mtime)) = lowResTime mtime + +-- Returns min, max EpochTime that weakly match the time of the InodeCache. +inodeCacheEpochTimeRange :: InodeCache -> (EpochTime, EpochTime) +inodeCacheEpochTimeRange i = + let t = inodeCacheToEpochTime i + in (t-1, t+1) + +replaceInode :: FileID -> InodeCache -> InodeCache +replaceInode inode (InodeCache (InodeCachePrim _ sz mtime)) = + InodeCache (InodeCachePrim inode sz mtime) + +{- For backwards compatibility, support low-res mtime with no + - fractional seconds. -} +data MTime = MTimeLowRes EpochTime | MTimeHighRes POSIXTime + deriving (Show, Ord) + +{- A low-res time compares equal to any high-res time in the same second. -} +instance Eq MTime where + MTimeLowRes a == MTimeLowRes b = a == b + MTimeHighRes a == MTimeHighRes b = a == b + MTimeHighRes a == MTimeLowRes b = lowResTime a == b + MTimeLowRes a == MTimeHighRes b = a == lowResTime b + +class MultiResTime t where + lowResTime :: t -> EpochTime + highResTime :: t -> POSIXTime + +instance MultiResTime EpochTime where + lowResTime = id + highResTime = realToFrac + +instance MultiResTime POSIXTime where + lowResTime = fromInteger . floor + highResTime = id + +instance MultiResTime MTime where + lowResTime (MTimeLowRes t) = t + lowResTime (MTimeHighRes t) = lowResTime t + highResTime (MTimeLowRes t) = highResTime t + highResTime (MTimeHighRes t) = t + +showInodeCache :: InodeCache -> String +showInodeCache (InodeCache (InodeCachePrim inode size (MTimeHighRes mtime))) = + let (t, d) = separate (== '.') (takeWhile (/= 's') (show mtime)) + in unwords + [ show inode + , show size + , t + , d + ] +showInodeCache (InodeCache (InodeCachePrim inode size (MTimeLowRes mtime))) = + unwords + [ show inode + , show size + , show mtime + ] + +readInodeCache :: String -> Maybe InodeCache +readInodeCache s = case words s of + (inode:size:mtime:[]) -> do + i <- readish inode + sz <- readish size + t <- readish mtime + return $ InodeCache $ InodeCachePrim i sz (MTimeLowRes t) + (inode:size:mtime:mtimedecimal:_) -> do + i <- readish inode + sz <- readish size + t <- parsePOSIXTime $ mtime ++ '.' : mtimedecimal + return $ InodeCache $ InodeCachePrim i sz (MTimeHighRes t) + _ -> Nothing + +genInodeCache :: RawFilePath -> TSDelta -> IO (Maybe InodeCache) +genInodeCache f delta = catchDefaultIO Nothing $ + toInodeCache delta f =<< R.getSymbolicLinkStatus f + +toInodeCache :: TSDelta -> RawFilePath -> FileStatus -> IO (Maybe InodeCache) +toInodeCache d f s = toInodeCache' d f s (fileID s) + +toInodeCache' :: TSDelta -> RawFilePath -> FileStatus -> FileID -> IO (Maybe InodeCache) +toInodeCache' (TSDelta getdelta) f s inode + | isRegularFile s = do + delta <- getdelta + sz <- getFileSize' f s +#ifdef mingw32_HOST_OS + mtime <- utcTimeToPOSIXSeconds <$> getModificationTime (fromRawFilePath f) +#else + let mtime = Posix.modificationTimeHiRes s +#endif + return $ Just $ InodeCache $ InodeCachePrim inode sz (MTimeHighRes (mtime + highResTime delta)) + | otherwise = pure Nothing + +{- Some filesystem get new random inodes each time they are mounted. + - To detect this and other problems, a sentinal file can be created. + - Its InodeCache at the time of its creation is written to the cache file, + - so changes can later be detected. -} +data SentinalFile = SentinalFile + { sentinalFile :: RawFilePath + , sentinalCacheFile :: RawFilePath + } + deriving (Show) + +{- On Windows, the mtime of a file appears to change when the time zone is + - changed. To deal with this, a TSDelta can be used; the delta is added to + - the mtime when generating an InodeCache. The current delta can be found + - by looking at the SentinalFile. Effectively, this makes all InodeCaches + - use the same time zone that was in use when the sential file was + - originally written. -} +newtype TSDelta = TSDelta (IO EpochTime) + +noTSDelta :: TSDelta +noTSDelta = TSDelta (pure 0) + +writeSentinalFile :: SentinalFile -> IO () +writeSentinalFile s = do + writeFile (fromRawFilePath (sentinalFile s)) "" + maybe noop (writeFile (fromRawFilePath (sentinalCacheFile s)) . showInodeCache) + =<< genInodeCache (sentinalFile s) noTSDelta + +data SentinalStatus = SentinalStatus + { sentinalInodesChanged :: Bool + , sentinalTSDelta :: TSDelta + } + +{- Checks if the InodeCache of the sentinal file is the same + - as it was when it was originally created. + - + - On Windows, time stamp differences are ignored, since they change + - with the timezone. + - + - When the sential file does not exist, InodeCaches cannot reliably be + - compared, so the assumption is that there is has been a change. + -} +checkSentinalFile :: SentinalFile -> IO SentinalStatus +checkSentinalFile s = do + mold <- loadoldcache + case mold of + Nothing -> return dummy + (Just old) -> do + mnew <- gennewcache + case mnew of + Nothing -> return dummy + Just new -> return $ calc old new + where + loadoldcache = catchDefaultIO Nothing $ + readInodeCache <$> readFile (fromRawFilePath (sentinalCacheFile s)) + gennewcache = genInodeCache (sentinalFile s) noTSDelta + calc (InodeCache (InodeCachePrim oldinode oldsize oldmtime)) (InodeCache (InodeCachePrim newinode newsize newmtime)) = + SentinalStatus (not unchanged) tsdelta + where +#ifdef mingw32_HOST_OS + -- Since mtime can appear to change when the time zone is + -- changed in windows, we cannot look at the mtime for the + -- sentinal file. + unchanged = oldinode == newinode && oldsize == newsize && (newmtime == newmtime) + tsdelta = TSDelta $ do + -- Run when generating an InodeCache, + -- to get the current delta. + mnew <- gennewcache + return $ case mnew of + Just (InodeCache (InodeCachePrim _ _ currmtime)) -> + lowResTime oldmtime - lowResTime currmtime + Nothing -> 0 +#else + unchanged = oldinode == newinode && oldsize == newsize && oldmtime == newmtime + tsdelta = noTSDelta +#endif + dummy = SentinalStatus True noTSDelta + +sentinalFileExists :: SentinalFile -> IO Bool +sentinalFileExists s = allM R.doesPathExist [sentinalCacheFile s, sentinalFile s] + +instance Arbitrary InodeCache where + arbitrary = + let prim = InodeCachePrim + <$> arbitrary + <*> arbitrary + <*> arbitrary + in InodeCache <$> prim + +instance Arbitrary MTime where + arbitrary = frequency + -- timestamp is not usually negative + [ (50, MTimeLowRes <$> (abs . fromInteger <$> arbitrary)) + , (50, MTimeHighRes <$> arbitrary) + ] + +prop_read_show_inodecache :: InodeCache -> Bool +prop_read_show_inodecache c = case readInodeCache (showInodeCache c) of + Nothing -> False + Just c' -> compareStrong c c' diff --git a/Utility/Metered.hs b/Utility/Metered.hs index ec16e33..a8a7111 100644 --- a/Utility/Metered.hs +++ b/Utility/Metered.hs @@ -1,6 +1,6 @@ {- Metered IO and actions - - - Copyright 2012-2018 Joey Hess <id@joeyh.name> + - Copyright 2012-2021 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} @@ -9,8 +9,10 @@ module Utility.Metered ( MeterUpdate, + MeterState(..), nullMeterUpdate, combineMeterUpdate, + TotalSize(..), BytesProcessed(..), toBytesProcessed, fromBytesProcessed, @@ -29,10 +31,13 @@ module Utility.Metered ( ProgressParser, commandMeter, commandMeter', + commandMeterExitCode, + commandMeterExitCode', demeterCommand, demeterCommandEnv, avoidProgress, rateLimitMeterUpdate, + bwLimitMeterUpdate, Meter, mkMeter, setMeterTotalSize, @@ -46,6 +51,9 @@ import Common import Utility.Percentage import Utility.DataUnits import Utility.HumanTime +import Utility.SimpleProtocol as Proto +import Utility.ThreadScheduler +import Utility.SafeOutput import qualified Data.ByteString.Lazy as L import qualified Data.ByteString as S @@ -73,7 +81,7 @@ combineMeterUpdate a b = \n -> a n >> b n {- Total number of bytes processed so far. -} newtype BytesProcessed = BytesProcessed Integer - deriving (Eq, Ord, Show) + deriving (Eq, Ord, Show, Read) class AsBytesProcessed a where toBytesProcessed :: a -> BytesProcessed @@ -113,23 +121,24 @@ withMeteredFile :: FilePath -> MeterUpdate -> (L.ByteString -> IO a) -> IO a withMeteredFile f meterupdate a = withBinaryFile f ReadMode $ \h -> hGetContentsMetered h meterupdate >>= a -{- Writes a ByteString to a Handle, updating a meter as it's written. -} -meteredWrite :: MeterUpdate -> Handle -> L.ByteString -> IO () -meteredWrite meterupdate h = void . meteredWrite' meterupdate h +{- Calls the action repeatedly with chunks from the lazy ByteString. + - Updates the meter after each chunk is processed. -} +meteredWrite :: MeterUpdate -> (S.ByteString -> IO ()) -> L.ByteString -> IO () +meteredWrite meterupdate a = void . meteredWrite' meterupdate a -meteredWrite' :: MeterUpdate -> Handle -> L.ByteString -> IO BytesProcessed -meteredWrite' meterupdate h = go zeroBytesProcessed . L.toChunks +meteredWrite' :: MeterUpdate -> (S.ByteString -> IO ()) -> L.ByteString -> IO BytesProcessed +meteredWrite' meterupdate a = go zeroBytesProcessed . L.toChunks where go sofar [] = return sofar go sofar (c:cs) = do - S.hPut h c + a c let !sofar' = addBytesProcessed sofar $ S.length c meterupdate sofar' go sofar' cs meteredWriteFile :: MeterUpdate -> FilePath -> L.ByteString -> IO () meteredWriteFile meterupdate f b = withBinaryFile f WriteMode $ \h -> - meteredWrite meterupdate h b + meteredWrite meterupdate (S.hPut h) b {- Applies an offset to a MeterUpdate. This can be useful when - performing a sequence of actions, such as multiple meteredWriteFiles, @@ -165,8 +174,9 @@ hGetMetered h wantsize meterupdate = lazyRead zeroBytesProcessed c <- S.hGet h (nextchunksize (fromBytesProcessed sofar)) if S.null c then do - hClose h - return $ L.empty + when (wantsize /= Just 0) $ + hClose h + return L.empty else do let !sofar' = addBytesProcessed sofar (S.length c) meterupdate sofar' @@ -218,7 +228,8 @@ watchFileSize f p a = bracket p sz watcher sz getsz = catchDefaultIO zeroBytesProcessed $ - toBytesProcessed <$> getFileSize f + toBytesProcessed <$> getFileSize f' + f' = toRawFilePath f data OutputHandler = OutputHandler { quietMode :: Bool @@ -226,31 +237,45 @@ data OutputHandler = OutputHandler } {- Parses the String looking for a command's progress output, and returns - - Maybe the number of bytes done so far, and any any remainder of the - - string that could be an incomplete progress output. That remainder - - should be prepended to future output, and fed back in. This interface - - allows the command's output to be read in any desired size chunk, or - - even one character at a time. + - Maybe the number of bytes done so far, optionally a total size, + - and any any remainder of the string that could be an incomplete + - progress output. That remainder should be prepended to future output, + - and fed back in. This interface allows the command's output to be read + - in any desired size chunk, or even one character at a time. -} -type ProgressParser = String -> (Maybe BytesProcessed, String) +type ProgressParser = String -> (Maybe BytesProcessed, Maybe TotalSize, String) + +newtype TotalSize = TotalSize Integer + deriving (Show, Eq) {- Runs a command and runs a ProgressParser on its output, in order - to update a meter. + - + - If the Meter is provided, the ProgressParser can report the total size, + - which allows creating a Meter before the size is known. -} -commandMeter :: ProgressParser -> OutputHandler -> MeterUpdate -> FilePath -> [CommandParam] -> IO Bool -commandMeter progressparser oh meterupdate cmd params = do - ret <- commandMeter' progressparser oh meterupdate cmd params +commandMeter :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> IO Bool +commandMeter progressparser oh meter meterupdate cmd params = + commandMeter' progressparser oh meter meterupdate cmd params id + +commandMeter' :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> (CreateProcess -> CreateProcess) -> IO Bool +commandMeter' progressparser oh meter meterupdate cmd params mkprocess = do + ret <- commandMeterExitCode' progressparser oh meter meterupdate cmd params mkprocess return $ case ret of Just ExitSuccess -> True _ -> False -commandMeter' :: ProgressParser -> OutputHandler -> MeterUpdate -> FilePath -> [CommandParam] -> IO (Maybe ExitCode) -commandMeter' progressparser oh meterupdate cmd params = - outputFilter cmd params Nothing - (feedprogress zeroBytesProcessed []) +commandMeterExitCode :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> IO (Maybe ExitCode) +commandMeterExitCode progressparser oh meter meterupdate cmd params = + commandMeterExitCode' progressparser oh meter meterupdate cmd params id + +commandMeterExitCode' :: ProgressParser -> OutputHandler -> Maybe Meter -> MeterUpdate -> FilePath -> [CommandParam] -> (CreateProcess -> CreateProcess) -> IO (Maybe ExitCode) +commandMeterExitCode' progressparser oh mmeter meterupdate cmd params mkprocess = + outputFilter cmd params mkprocess Nothing + (const $ feedprogress mmeter zeroBytesProcessed []) handlestderr where - feedprogress prev buf h = do + feedprogress sendtotalsize prev buf h = do b <- S.hGetSome h 80 if S.null b then return () @@ -259,17 +284,24 @@ commandMeter' progressparser oh meterupdate cmd params = S.hPut stdout b hFlush stdout let s = decodeBS b - let (mbytes, buf') = progressparser (buf++s) + let (mbytes, mtotalsize, buf') = progressparser (buf++s) + sendtotalsize' <- case (sendtotalsize, mtotalsize) of + (Just meter, Just t) -> do + setMeterTotalSize meter t + return Nothing + _ -> return sendtotalsize case mbytes of - Nothing -> feedprogress prev buf' h + Nothing -> feedprogress sendtotalsize' prev buf' h (Just bytes) -> do when (bytes /= prev) $ meterupdate bytes - feedprogress bytes buf' h + feedprogress sendtotalsize' bytes buf' h - handlestderr h = unlessM (hIsEOF h) $ do - stderrHandler oh =<< hGetLine h - handlestderr h + handlestderr ph h = hGetLineUntilExitOrEOF ph h >>= \case + Just l -> do + stderrHandler oh l + handlestderr ph h + Nothing -> return () {- Runs a command, that may display one or more progress meters on - either stdout or stderr, and prevents the meters from being displayed. @@ -281,46 +313,54 @@ demeterCommand oh cmd params = demeterCommandEnv oh cmd params Nothing demeterCommandEnv :: OutputHandler -> FilePath -> [CommandParam] -> Maybe [(String, String)] -> IO Bool demeterCommandEnv oh cmd params environ = do - ret <- outputFilter cmd params environ - (\outh -> avoidProgress True outh stdouthandler) - (\errh -> avoidProgress True errh $ stderrHandler oh) + ret <- outputFilter cmd params id environ + (\ph outh -> avoidProgress True ph outh stdouthandler) + (\ph errh -> avoidProgress True ph errh $ stderrHandler oh) return $ case ret of Just ExitSuccess -> True _ -> False where stdouthandler l = unless (quietMode oh) $ - putStrLn l + putStrLn (safeOutput l) {- To suppress progress output, while displaying other messages, - filter out lines that contain \r (typically used to reset to the - beginning of the line when updating a progress display). -} -avoidProgress :: Bool -> Handle -> (String -> IO ()) -> IO () -avoidProgress doavoid h emitter = unlessM (hIsEOF h) $ do - s <- hGetLine h - unless (doavoid && '\r' `elem` s) $ - emitter s - avoidProgress doavoid h emitter +avoidProgress :: Bool -> ProcessHandle -> Handle -> (String -> IO ()) -> IO () +avoidProgress doavoid ph h emitter = hGetLineUntilExitOrEOF ph h >>= \case + Just s -> do + unless (doavoid && '\r' `elem` s) $ + emitter s + avoidProgress doavoid ph h emitter + Nothing -> return () outputFilter :: FilePath -> [CommandParam] + -> (CreateProcess -> CreateProcess) -> Maybe [(String, String)] - -> (Handle -> IO ()) - -> (Handle -> IO ()) + -> (ProcessHandle -> Handle -> IO ()) + -> (ProcessHandle -> Handle -> IO ()) -> IO (Maybe ExitCode) -outputFilter cmd params environ outfilter errfilter = catchMaybeIO $ do - (_, Just outh, Just errh, pid) <- createProcess p - { std_out = CreatePipe +outputFilter cmd params mkprocess environ outfilter errfilter = + catchMaybeIO $ withCreateProcess p go + where + go _ (Just outh) (Just errh) ph = do + outt <- async $ tryIO (outfilter ph outh) >> hClose outh + errt <- async $ tryIO (errfilter ph errh) >> hClose errh + ret <- waitForProcess ph + wait outt + wait errt + return ret + go _ _ _ _ = error "internal" + + p = mkprocess (proc cmd (toCommand params)) + { env = environ + , std_out = CreatePipe , std_err = CreatePipe } - void $ async $ tryIO (outfilter outh) >> hClose outh - void $ async $ tryIO (errfilter errh) >> hClose errh - waitForProcess pid - where - p = (proc cmd (toCommand params)) - { env = environ } -- | Limit a meter to only update once per unit of time. -- @@ -333,7 +373,7 @@ rateLimitMeterUpdate delta (Meter totalsizev _ _ _) meterupdate = do return $ mu lastupdate where mu lastupdate n@(BytesProcessed i) = readMVar totalsizev >>= \case - Just t | i >= t -> meterupdate n + Just (TotalSize t) | i >= t -> meterupdate n _ -> do now <- getPOSIXTime prev <- takeMVar lastupdate @@ -343,46 +383,95 @@ rateLimitMeterUpdate delta (Meter totalsizev _ _ _) meterupdate = do meterupdate n else putMVar lastupdate prev -data Meter = Meter (MVar (Maybe Integer)) (MVar MeterState) (MVar String) DisplayMeter +-- | Bandwidth limiting by inserting a delay at the point that a meter is +-- updated. +-- +-- This will only work when the actions that use bandwidth are run in the +-- same process and thread as the call to the MeterUpdate. +-- +-- For example, if the desired bandwidth is 100kb/s, and over the past +-- 1/10th of a second, 30kb was sent, then the current bandwidth is +-- 300kb/s, 3x as fast as desired. So, after getting the next chunk, +-- pause for twice as long as it took to get it. +bwLimitMeterUpdate :: ByteSize -> Duration -> MeterUpdate -> IO MeterUpdate +bwLimitMeterUpdate bwlimit duration meterupdate + | bwlimit <= 0 = return meterupdate + | otherwise = do + nowtime <- getPOSIXTime + mv <- newMVar (nowtime, Nothing) + return (mu mv) + where + mu mv n@(BytesProcessed i) = do + endtime <- getPOSIXTime + (starttime, mprevi) <- takeMVar mv + + case mprevi of + Just previ -> do + let runtime = endtime - starttime + let currbw = fromIntegral (i - previ) / runtime + let pausescale = if currbw > bwlimit' + then (currbw / bwlimit') - 1 + else 0 + unboundDelay (floor (runtime * pausescale * msecs)) + Nothing -> return () + + meterupdate n -type MeterState = (BytesProcessed, POSIXTime) + nowtime <- getPOSIXTime + putMVar mv (nowtime, Just i) -type DisplayMeter = MVar String -> Maybe Integer -> (BytesProcessed, POSIXTime) -> (BytesProcessed, POSIXTime) -> IO () + bwlimit' = fromIntegral (bwlimit * durationSeconds duration) + msecs = fromIntegral oneSecond -type RenderMeter = Maybe Integer -> (BytesProcessed, POSIXTime) -> (BytesProcessed, POSIXTime) -> String +data Meter = Meter (MVar (Maybe TotalSize)) (MVar MeterState) (MVar String) DisplayMeter + +data MeterState = MeterState + { meterBytesProcessed :: BytesProcessed + , meterTimeStamp :: POSIXTime + } deriving (Show) + +type DisplayMeter = MVar String -> Maybe TotalSize -> MeterState -> MeterState -> IO () + +type RenderMeter = Maybe TotalSize -> MeterState -> MeterState -> String -- | Make a meter. Pass the total size, if it's known. -mkMeter :: Maybe Integer -> DisplayMeter -> IO Meter -mkMeter totalsize displaymeter = Meter - <$> newMVar totalsize - <*> ((\t -> newMVar (zeroBytesProcessed, t)) =<< getPOSIXTime) - <*> newMVar "" - <*> pure displaymeter - -setMeterTotalSize :: Meter -> Integer -> IO () +mkMeter :: Maybe TotalSize -> DisplayMeter -> IO Meter +mkMeter totalsize displaymeter = do + ts <- getPOSIXTime + Meter + <$> newMVar totalsize + <*> newMVar (MeterState zeroBytesProcessed ts) + <*> newMVar "" + <*> pure displaymeter + +setMeterTotalSize :: Meter -> TotalSize -> IO () setMeterTotalSize (Meter totalsizev _ _ _) = void . swapMVar totalsizev . Just -- | Updates the meter, displaying it if necessary. updateMeter :: Meter -> MeterUpdate updateMeter (Meter totalsizev sv bv displaymeter) new = do now <- getPOSIXTime - (old, before) <- swapMVar sv (new, now) - when (old /= new) $ do + let curms = MeterState new now + oldms <- swapMVar sv curms + when (meterBytesProcessed oldms /= new) $ do totalsize <- readMVar totalsizev - displaymeter bv totalsize (old, before) (new, now) + displaymeter bv totalsize oldms curms -- | Display meter to a Handle. displayMeterHandle :: Handle -> RenderMeter -> DisplayMeter displayMeterHandle h rendermeter v msize old new = do + olds <- takeMVar v let s = rendermeter msize old new - olds <- swapMVar v s + let padding = replicate (length olds - length s) ' ' + let s' = s <> padding + putMVar v s' -- Avoid writing when the rendered meter has not changed. - when (olds /= s) $ do - let padding = replicate (length olds - length s) ' ' - hPutStr h ('\r':s ++ padding) + when (olds /= s') $ do + hPutStr h ('\r':s') hFlush h --- | Clear meter displayed by displayMeterHandle. +-- | Clear meter displayed by displayMeterHandle. May be called before +-- outputting something else, followed by more calls to displayMeterHandle. clearMeterHandle :: Meter -> Handle -> IO () clearMeterHandle (Meter _ _ v _) h = do olds <- readMVar v @@ -394,7 +483,7 @@ clearMeterHandle (Meter _ _ v _) h = do -- or when total size is not known: -- 1.3 MiB 300 KiB/s bandwidthMeter :: RenderMeter -bandwidthMeter mtotalsize (BytesProcessed old, before) (BytesProcessed new, now) = +bandwidthMeter mtotalsize (MeterState (BytesProcessed old) before) (MeterState (BytesProcessed new) now) = unwords $ catMaybes [ Just percentamount -- Pad enough for max width: "100% xxxx.xx KiB xxxx KiB/s" @@ -403,22 +492,26 @@ bandwidthMeter mtotalsize (BytesProcessed old, before) (BytesProcessed new, now) , estimatedcompletion ] where - amount = roughSize' memoryUnits True 2 new + amount = roughSize' committeeUnits True 2 new percentamount = case mtotalsize of - Just totalsize -> + Just (TotalSize totalsize) -> let p = showPercentage 0 $ percentage totalsize (min new totalsize) in p ++ replicate (6 - length p) ' ' ++ amount Nothing -> amount - rate = roughSize' memoryUnits True 0 bytespersecond ++ "/s" + rate = roughSize' committeeUnits True 0 bytespersecond ++ "/s" bytespersecond | duration == 0 = fromIntegral transferred | otherwise = floor $ fromIntegral transferred / duration transferred = max 0 (new - old) duration = max 0 (now - before) estimatedcompletion = case mtotalsize of - Just totalsize + Just (TotalSize totalsize) | bytespersecond > 0 -> Just $ fromDuration $ Duration $ (totalsize - new) `div` bytespersecond _ -> Nothing + +instance Proto.Serializable BytesProcessed where + serialize (BytesProcessed n) = show n + deserialize = BytesProcessed <$$> readish diff --git a/Utility/Misc.hs b/Utility/Misc.hs index 2f1766e..3cf5275 100644 --- a/Utility/Misc.hs +++ b/Utility/Misc.hs @@ -11,6 +11,8 @@ module Utility.Misc ( hGetContentsStrict, readFileStrict, separate, + separate', + separateEnd', firstLine, firstLine', segment, @@ -54,6 +56,20 @@ separate c l = unbreak $ break c l | null b = r | otherwise = (a, tail b) +separate' :: (Word8 -> Bool) -> S.ByteString -> (S.ByteString, S.ByteString) +separate' c l = unbreak $ S.break c l + where + unbreak r@(a, b) + | S.null b = r + | otherwise = (a, S.tail b) + +separateEnd' :: (Word8 -> Bool) -> S.ByteString -> (S.ByteString, S.ByteString) +separateEnd' c l = unbreak $ S.breakEnd c l + where + unbreak r@(a, b) + | S.null a = r + | otherwise = (S.init a, b) + {- Breaks out the first line. -} firstLine :: String -> String firstLine = takeWhile (/= '\n') @@ -78,7 +94,7 @@ prop_segment_regressionTest :: Bool prop_segment_regressionTest = all id -- Even an empty list is a segment. [ segment (== "--") [] == [[]] - -- There are two segements in this list, even though the first is empty. + -- There are two segments in this list, even though the first is empty. , segment (== "--") ["--", "foo", "bar"] == [[],["foo","bar"]] ] diff --git a/Utility/Monad.hs b/Utility/Monad.hs index abe06f3..6cd2c5e 100644 --- a/Utility/Monad.hs +++ b/Utility/Monad.hs @@ -12,6 +12,7 @@ module Utility.Monad ( getM, anyM, allM, + partitionM, untilTrue, ifM, (<||>), @@ -45,6 +46,13 @@ allM :: Monad m => (a -> m Bool) -> [a] -> m Bool allM _ [] = return True allM p (x:xs) = p x <&&> allM p xs +partitionM :: Monad m => (a -> m Bool) -> [a] -> m ([a], [a]) +partitionM _ [] = return ([], []) +partitionM p (x:xs) = do + r <- p x + (as, bs) <- partitionM p xs + return $ if r then (x:as, bs) else (as, x:bs) + {- Runs an action on values from a list until it succeeds. -} untilTrue :: Monad m => [a] -> (a -> m Bool) -> m Bool untilTrue = flip anyM diff --git a/Utility/MoveFile.hs b/Utility/MoveFile.hs new file mode 100644 index 0000000..6481b29 --- /dev/null +++ b/Utility/MoveFile.hs @@ -0,0 +1,79 @@ +{- moving files + - + - Copyright 2011-2020 Joey Hess <id@joeyh.name> + - + - License: BSD-2-clause + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE LambdaCase #-} +{-# OPTIONS_GHC -fno-warn-tabs #-} + +module Utility.MoveFile ( + moveFile, +) where + +import Control.Monad +import System.IO.Error +import Prelude + +#ifndef mingw32_HOST_OS +import System.PosixCompat.Files (isDirectory) +import Control.Monad.IfElse +import Utility.SafeCommand +#endif + +import Utility.SystemDirectory +import Utility.Tmp +import Utility.Exception +import Utility.Monad +import Utility.FileSystemEncoding +import qualified Utility.RawFilePath as R + +{- Moves one filename to another. + - First tries a rename, but falls back to moving across devices if needed. -} +moveFile :: RawFilePath -> RawFilePath -> IO () +moveFile src dest = tryIO (R.rename src dest) >>= onrename + where + onrename (Right _) = noop + onrename (Left e) + | isPermissionError e = rethrow + | isDoesNotExistError e = rethrow + | otherwise = viaTmp mv (fromRawFilePath dest) () + where + rethrow = throwM e + + mv tmp () = do + -- copyFile is likely not as optimised as + -- the mv command, so we'll use the command. + -- + -- But, while Windows has a "mv", it does not + -- seem very reliable, so use copyFile there. +#ifndef mingw32_HOST_OS + -- If dest is a directory, mv would move the file + -- into it, which is not desired. + whenM (isdir dest) rethrow + ok <- boolSystem "mv" + [ Param "-f" + , Param (fromRawFilePath src) + , Param tmp + ] + let e' = e +#else + r <- tryIO $ copyFile (fromRawFilePath src) tmp + let (ok, e') = case r of + Left err -> (False, err) + Right _ -> (True, e) +#endif + unless ok $ do + -- delete any partial + _ <- tryIO $ removeFile tmp + throwM e' + +#ifndef mingw32_HOST_OS + isdir f = do + r <- tryIO $ R.getSymbolicLinkStatus f + case r of + (Left _) -> return False + (Right s) -> return $ isDirectory s +#endif diff --git a/Utility/Path.hs b/Utility/Path.hs index ecc752c..64ef076 100644 --- a/Utility/Path.hs +++ b/Utility/Path.hs @@ -1,63 +1,63 @@ {- path manipulation - - - Copyright 2010-2014 Joey Hess <id@joeyh.name> + - Copyright 2010-2020 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} {-# OPTIONS_GHC -fno-warn-tabs #-} module Utility.Path ( simplifyPath, - absPathFrom, parentDir, upFrom, dirContains, - absPath, - relPathCwdToFile, - relPathDirToFile, - relPathDirToFileAbs, segmentPaths, + segmentPaths', runSegmentPaths, - relHome, - inPath, - searchPath, + runSegmentPaths', dotfile, - sanitizeFilePath, splitShortExtensions, - - prop_upFrom_basics, - prop_relPathDirToFile_basics, - prop_relPathDirToFile_regressionTest, + splitShortExtensions', + relPathDirToFileAbs, + inSearchPath, + searchPath, + searchPathContents, ) where -import System.FilePath +import System.FilePath.ByteString +import qualified System.FilePath as P +import qualified Data.ByteString as B import Data.List import Data.Maybe -import Data.Char +import Control.Monad import Control.Applicative import Prelude import Utility.Monad -import Utility.UserInfo -import Utility.Directory -import Utility.Split +import Utility.SystemDirectory +import Utility.Exception + +#ifdef mingw32_HOST_OS +import Data.Char import Utility.FileSystemEncoding +#endif {- Simplifies a path, removing any "." component, collapsing "dir/..", - and removing the trailing path separator. - - On Windows, preserves whichever style of path separator might be used in - - the input FilePaths. This is done because some programs in Windows + - the input RawFilePaths. This is done because some programs in Windows - demand a particular path separator -- and which one actually varies! - - This does not guarantee that two paths that refer to the same location, - and are both relative to the same location (or both absolute) will - - yeild the same result. Run both through normalise from System.FilePath + - yield the same result. Run both through normalise from System.RawFilePath - to ensure that. -} -simplifyPath :: FilePath -> FilePath +simplifyPath :: RawFilePath -> RawFilePath simplifyPath path = dropTrailingPathSeparator $ joinDrive drive $ joinPath $ norm [] $ splitPath path' where @@ -72,88 +72,143 @@ simplifyPath path = dropTrailingPathSeparator $ where p' = dropTrailingPathSeparator p -{- Makes a path absolute. - - - - The first parameter is a base directory (ie, the cwd) to use if the path - - is not already absolute, and should itsef be absolute. - - - - Does not attempt to deal with edge cases or ensure security with - - untrusted inputs. - -} -absPathFrom :: FilePath -> FilePath -> FilePath -absPathFrom dir path = simplifyPath (combine dir path) - {- takeDirectory "foo/bar/" is "foo/bar". This instead yields "foo" -} -parentDir :: FilePath -> FilePath +parentDir :: RawFilePath -> RawFilePath parentDir = takeDirectory . dropTrailingPathSeparator {- Just the parent directory of a path, or Nothing if the path has no -- parent (ie for "/" or ".") -} -upFrom :: FilePath -> Maybe FilePath +- parent (ie for "/" or "." or "foo") -} +upFrom :: RawFilePath -> Maybe RawFilePath upFrom dir | length dirs < 2 = Nothing - | otherwise = Just $ joinDrive drive $ intercalate s $ init dirs + | otherwise = Just $ joinDrive drive $ + B.intercalate (B.singleton pathSeparator) $ init dirs where -- on Unix, the drive will be "/" when the dir is absolute, -- otherwise "" (drive, path) = splitDrive dir - s = [pathSeparator] - dirs = filter (not . null) $ split s path - -prop_upFrom_basics :: FilePath -> Bool -prop_upFrom_basics dir - | null dir = True - | dir == "/" = p == Nothing - | otherwise = p /= Just dir - where - p = upFrom dir + dirs = filter (not . B.null) $ B.splitWith isPathSeparator path -{- Checks if the first FilePath is, or could be said to contain the second. +{- Checks if the first RawFilePath is, or could be said to contain the second. - For example, "foo/" contains "foo/bar". Also, "foo", "./foo", "foo/" etc - - are all equivilant. + - are all equivalent. -} -dirContains :: FilePath -> FilePath -> Bool +dirContains :: RawFilePath -> RawFilePath -> Bool dirContains a b = a == b || a' == b' - || (addTrailingPathSeparator a') `isPrefixOf` b' - || a' == "." && normalise ("." </> b') == b' + || (a'' `B.isPrefixOf` b' && avoiddotdotb) + || a' == "." && normalise ("." </> b') == b' && nodotdot b' + || dotdotcontains where a' = norm a + a'' = addTrailingPathSeparator a' b' = norm b norm = normalise . simplifyPath -{- Converts a filename into an absolute path. - - - - Unlike Directory.canonicalizePath, this does not require the path - - already exists. -} -absPath :: FilePath -> IO FilePath -absPath file = do - cwd <- getCurrentDirectory - return $ absPathFrom cwd file + {- This handles the case where a is ".." and b is "../..", + - which is not inside a. Similarly, "../.." does not contain + - "../../../". Due to the use of norm, cases like + - "../../foo/../../" get converted to eg "../../.." and + - so do not need to be handled specially here. + - + - When this is called, we already know that + - a'' is a prefix of b', so all that needs to be done is drop + - that prefix, and check if the next path component is ".." + -} + avoiddotdotb = nodotdot $ B.drop (B.length a'') b' -{- Constructs a relative path from the CWD to a file. + nodotdot p = all (not . isdotdot) (splitPath p) + + isdotdot s = dropTrailingPathSeparator s == ".." + + {- This handles the case where a is ".." or "../.." etc, + - and b is "foo" or "../foo" etc. The rule is that when + - a is entirely ".." components, b is under it when it starts + - with fewer ".." components. + - + - Due to the use of norm, cases like "../../foo/../../" get + - converted to eg "../../../" and so do not need to be handled + - specially here. + -} + dotdotcontains + | isAbsolute b' = False + | otherwise = + let aps = splitPath a' + bps = splitPath b' + in if all isdotdot aps + then length (takeWhile isdotdot bps) < length aps + else False + +{- Given an original list of paths, and an expanded list derived from it, + - which may be arbitrarily reordered, generates a list of lists, where + - each sublist corresponds to one of the original paths. + - + - When the original path is a directory, any items in the expanded list + - that are contained in that directory will appear in its segment. - - - For example, assuming CWD is /tmp/foo/bar: - - relPathCwdToFile "/tmp/foo" == ".." - - relPathCwdToFile "/tmp/foo/bar" == "" + - The order of the original list of paths is attempted to be preserved in + - the order of the returned segments. However, doing so has a O^NM + - growth factor. So, if the original list has more than 100 paths on it, + - we stop preserving ordering at that point. Presumably a user passing + - that many paths in doesn't care too much about order of the later ones. -} -relPathCwdToFile :: FilePath -> IO FilePath -relPathCwdToFile f = do - c <- getCurrentDirectory - relPathDirToFile c f +segmentPaths :: (a -> RawFilePath) -> [RawFilePath] -> [a] -> [[a]] +segmentPaths = segmentPaths' (\_ r -> r) -{- Constructs a relative path from a directory to a file. -} -relPathDirToFile :: FilePath -> FilePath -> IO FilePath -relPathDirToFile from to = relPathDirToFileAbs <$> absPath from <*> absPath to +segmentPaths' :: (Maybe RawFilePath -> a -> r) -> (a -> RawFilePath) -> [RawFilePath] -> [a] -> [[r]] +segmentPaths' f _ [] new = [map (f Nothing) new] +segmentPaths' f _ [i] new = [map (f (Just i)) new] -- optimisation +segmentPaths' f c (i:is) new = + map (f (Just i)) found : segmentPaths' f c is rest + where + (found, rest) = if length is < 100 + then partition ini new + else break (not . ini) new + ini p = i `dirContains` c p -{- This requires the first path to be absolute, and the - - second path cannot contain ../ or ./ +{- This assumes that it's cheaper to call segmentPaths on the result, + - than it would be to run the action separately with each path. In + - the case of git file list commands, that assumption tends to hold. + -} +runSegmentPaths :: (a -> RawFilePath) -> ([RawFilePath] -> IO [a]) -> [RawFilePath] -> IO [[a]] +runSegmentPaths c a paths = segmentPaths c paths <$> a paths + +runSegmentPaths' :: (Maybe RawFilePath -> a -> r) -> (a -> RawFilePath) -> ([RawFilePath] -> IO [a]) -> [RawFilePath] -> IO [[r]] +runSegmentPaths' si c a paths = segmentPaths' si c paths <$> a paths + +{- Checks if a filename is a unix dotfile. All files inside dotdirs + - count as dotfiles. -} +dotfile :: RawFilePath -> Bool +dotfile file + | f == "." = False + | f == ".." = False + | f == "" = False + | otherwise = "." `B.isPrefixOf` f || dotfile (takeDirectory file) + where + f = takeFileName file + +{- Similar to splitExtensions, but knows that some things in RawFilePaths + - after a dot are too long to be extensions. -} +splitShortExtensions :: RawFilePath -> (RawFilePath, [B.ByteString]) +splitShortExtensions = splitShortExtensions' 5 -- enough for ".jpeg" +splitShortExtensions' :: Int -> RawFilePath -> (RawFilePath, [B.ByteString]) +splitShortExtensions' maxextension = go [] + where + go c f + | len > 0 && len <= maxextension && not (B.null base) = + go (ext:c) base + | otherwise = (f, c) + where + (base, ext) = splitExtension f + len = B.length ext + +{- This requires both paths to be absolute and normalized. - - On Windows, if the paths are on different drives, - a relative path is not possible and the path is simply - returned as-is. -} -relPathDirToFileAbs :: FilePath -> FilePath -> FilePath +relPathDirToFileAbs :: RawFilePath -> RawFilePath -> RawFilePath relPathDirToFileAbs from to #ifdef mingw32_HOST_OS | normdrive from /= normdrive to = to @@ -169,72 +224,21 @@ relPathDirToFileAbs from to dotdots = replicate (length pfrom - numcommon) ".." numcommon = length common #ifdef mingw32_HOST_OS - normdrive = map toLower . takeWhile (/= ':') . takeDrive + normdrive = map toLower + -- Get just the drive letter, removing any leading + -- path separator, which takeDrive leaves on the drive + -- letter. + . dropWhileEnd (isPathSeparator . fromIntegral . ord) + . fromRawFilePath + . takeDrive #endif -prop_relPathDirToFile_basics :: FilePath -> FilePath -> Bool -prop_relPathDirToFile_basics from to - | null from || null to = True - | from == to = null r - | otherwise = not (null r) - where - r = relPathDirToFileAbs from to - -prop_relPathDirToFile_regressionTest :: Bool -prop_relPathDirToFile_regressionTest = same_dir_shortcurcuits_at_difference - where - {- Two paths have the same directory component at the same - - location, but it's not really the same directory. - - Code used to get this wrong. -} - same_dir_shortcurcuits_at_difference = - relPathDirToFileAbs (joinPath [pathSeparator : "tmp", "r", "lll", "xxx", "yyy", "18"]) - (joinPath [pathSeparator : "tmp", "r", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"]) - == joinPath ["..", "..", "..", "..", ".git", "annex", "objects", "18", "gk", "SHA256-foo", "SHA256-foo"] - -{- Given an original list of paths, and an expanded list derived from it, - - which may be arbitrarily reordered, generates a list of lists, where - - each sublist corresponds to one of the original paths. - - - - When the original path is a directory, any items in the expanded list - - that are contained in that directory will appear in its segment. - - - - The order of the original list of paths is attempted to be preserved in - - the order of the returned segments. However, doing so has a O^NM - - growth factor. So, if the original list has more than 100 paths on it, - - we stop preserving ordering at that point. Presumably a user passing - - that many paths in doesn't care too much about order of the later ones. - -} -segmentPaths :: [RawFilePath] -> [RawFilePath] -> [[RawFilePath]] -segmentPaths [] new = [new] -segmentPaths [_] new = [new] -- optimisation -segmentPaths (l:ls) new = found : segmentPaths ls rest - where - (found, rest) = if length ls < 100 - then partition inl new - else break (not . inl) new - inl f = fromRawFilePath l `dirContains` fromRawFilePath f - -{- This assumes that it's cheaper to call segmentPaths on the result, - - than it would be to run the action separately with each path. In - - the case of git file list commands, that assumption tends to hold. - -} -runSegmentPaths :: ([RawFilePath] -> IO [RawFilePath]) -> [RawFilePath] -> IO [[RawFilePath]] -runSegmentPaths a paths = segmentPaths paths <$> a paths - -{- Converts paths in the home directory to use ~/ -} -relHome :: FilePath -> IO String -relHome path = do - home <- myHomeDir - return $ if dirContains home path - then "~/" ++ relPathDirToFileAbs home path - else path - {- Checks if a command is available in PATH. - - The command may be fully-qualified, in which case, this succeeds as - long as it exists. -} -inPath :: String -> IO Bool -inPath command = isJust <$> searchPath command +inSearchPath :: String -> IO Bool +inSearchPath command = isJust <$> searchPath command {- Finds a command in PATH and returns the full path to it. - @@ -245,10 +249,10 @@ inPath command = isJust <$> searchPath command -} searchPath :: String -> IO (Maybe FilePath) searchPath command - | isAbsolute command = check command - | otherwise = getSearchPath >>= getM indir + | P.isAbsolute command = check command + | otherwise = P.getSearchPath >>= getM indir where - indir d = check $ d </> command + indir d = check $ d P.</> command check f = firstM doesFileExist #ifdef mingw32_HOST_OS [f, f ++ ".exe"] @@ -256,44 +260,16 @@ searchPath command [f] #endif -{- Checks if a filename is a unix dotfile. All files inside dotdirs - - count as dotfiles. -} -dotfile :: FilePath -> Bool -dotfile file - | f == "." = False - | f == ".." = False - | f == "" = False - | otherwise = "." `isPrefixOf` f || dotfile (takeDirectory file) - where - f = takeFileName file - -{- Given a string that we'd like to use as the basis for FilePath, but that - - was provided by a third party and is not to be trusted, returns the closest - - sane FilePath. +{- Finds commands in PATH that match a predicate. Note that the predicate + - matches on the basename of the command, but the full path to it is + - returned. - - - All spaces and punctuation and other wacky stuff are replaced - - with '_', except for '.' - - "../" will thus turn into ".._", which is safe. + - Note that this will find commands in PATH that are not executable. -} -sanitizeFilePath :: String -> FilePath -sanitizeFilePath = map sanitize - where - sanitize c - | c == '.' = c - | isSpace c || isPunctuation c || isSymbol c || isControl c || c == '/' = '_' - | otherwise = c - -{- Similar to splitExtensions, but knows that some things in FilePaths - - after a dot are too long to be extensions. -} -splitShortExtensions :: FilePath -> (FilePath, [String]) -splitShortExtensions = splitShortExtensions' 5 -- enough for ".jpeg" -splitShortExtensions' :: Int -> FilePath -> (FilePath, [String]) -splitShortExtensions' maxextension = go [] +searchPathContents :: (FilePath -> Bool) -> IO [FilePath] +searchPathContents p = + filterM doesFileExist + =<< (concat <$> (P.getSearchPath >>= mapM go)) where - go c f - | len > 0 && len <= maxextension && not (null base) = - go (ext:c) base - | otherwise = (f, c) - where - (base, ext) = splitExtension f - len = length ext + go d = map (d P.</>) . filter p + <$> catchDefaultIO [] (getDirectoryContents d) diff --git a/Utility/Path/AbsRel.hs b/Utility/Path/AbsRel.hs new file mode 100644 index 0000000..4007fbb --- /dev/null +++ b/Utility/Path/AbsRel.hs @@ -0,0 +1,99 @@ +{- absolute and relative path manipulation + - + - Copyright 2010-2021 Joey Hess <id@joeyh.name> + - + - License: BSD-2-clause + -} + +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE CPP #-} +{-# OPTIONS_GHC -fno-warn-tabs #-} + +module Utility.Path.AbsRel ( + absPathFrom, + absPath, + relPathCwdToFile, + relPathDirToFile, + relPathDirToFileAbs, + relHome, +) where + +import System.FilePath.ByteString +import qualified Data.ByteString as B +#ifdef mingw32_HOST_OS +import System.Directory (getCurrentDirectory) +#else +import System.Posix.Directory.ByteString (getWorkingDirectory) +#endif +import Control.Applicative +import Prelude + +import Utility.Path +import Utility.UserInfo +import Utility.FileSystemEncoding + +{- Makes a path absolute. + - + - Also simplifies it using simplifyPath. + - + - The first parameter is a base directory (ie, the cwd) to use if the path + - is not already absolute, and should itself be absolute. + - + - Does not attempt to deal with edge cases or ensure security with + - untrusted inputs. + -} +absPathFrom :: RawFilePath -> RawFilePath -> RawFilePath +absPathFrom dir path = simplifyPath (combine dir path) + +{- Converts a filename into an absolute path. + - + - Also simplifies it using simplifyPath. + - + - Unlike Directory.canonicalizePath, this does not require the path + - already exists. -} +absPath :: RawFilePath -> IO RawFilePath +absPath file + -- Avoid unncessarily getting the current directory when the path + -- is already absolute. absPathFrom uses simplifyPath + -- so also used here for consistency. + | isAbsolute file = return $ simplifyPath file + | otherwise = do +#ifdef mingw32_HOST_OS + cwd <- toRawFilePath <$> getCurrentDirectory +#else + cwd <- getWorkingDirectory +#endif + return $ absPathFrom cwd file + +{- Constructs the minimal relative path from the CWD to a file. + - + - For example, assuming CWD is /tmp/foo/bar: + - relPathCwdToFile "/tmp/foo" == ".." + - relPathCwdToFile "/tmp/foo/bar" == "" + - relPathCwdToFile "../bar/baz" == "baz" + -} +relPathCwdToFile :: RawFilePath -> IO RawFilePath +relPathCwdToFile f + -- Optimisation: Avoid doing any IO when the path is relative + -- and does not contain any ".." component. + | isRelative f && not (".." `B.isInfixOf` f) = return f + | otherwise = do +#ifdef mingw32_HOST_OS + c <- toRawFilePath <$> getCurrentDirectory +#else + c <- getWorkingDirectory +#endif + relPathDirToFile c f + +{- Constructs a minimal relative path from a directory to a file. -} +relPathDirToFile :: RawFilePath -> RawFilePath -> IO RawFilePath +relPathDirToFile from to = relPathDirToFileAbs <$> absPath from <*> absPath to + +{- Converts paths in the home directory to use ~/ -} +relHome :: FilePath -> IO String +relHome path = do + let path' = toRawFilePath path + home <- toRawFilePath <$> myHomeDir + return $ if dirContains home path' + then fromRawFilePath ("~/" <> relPathDirToFileAbs home path') + else path diff --git a/Utility/Process.hs b/Utility/Process.hs index af3a5f4..07f035d 100644 --- a/Utility/Process.hs +++ b/Utility/Process.hs @@ -1,17 +1,17 @@ {- System.Process enhancements, including additional ways of running - processes, and logging. - - - Copyright 2012-2015 Joey Hess <id@joeyh.name> + - Copyright 2012-2020 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} -{-# LANGUAGE CPP, Rank2Types #-} +{-# LANGUAGE CPP, Rank2Types, LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_GHC -fno-warn-tabs #-} module Utility.Process ( module X, - CreateProcess(..), StdHandle(..), readProcess, readProcess', @@ -20,41 +20,33 @@ module Utility.Process ( forceSuccessProcess, forceSuccessProcess', checkSuccessProcess, - ignoreFailureProcess, - createProcessSuccess, - createProcessChecked, - createBackgroundProcess, - withHandle, - withIOHandles, - withOEHandles, withNullHandle, - withQuietOutput, - feedWithQuietOutput, createProcess, + withCreateProcess, waitForProcess, + cleanupProcess, + hGetLineUntilExitOrEOF, startInteractiveProcess, stdinHandle, stdoutHandle, stderrHandle, - ioHandles, processHandle, + showCmd, devNull, ) where import qualified Utility.Process.Shim -import qualified Utility.Process.Shim as X hiding (CreateProcess(..), createProcess, runInteractiveProcess, readProcess, readProcessWithExitCode, system, rawSystem, runInteractiveCommand, runProcess) -import Utility.Process.Shim hiding (createProcess, readProcess, waitForProcess) +import Utility.Process.Shim as X (CreateProcess(..), ProcessHandle, StdStream(..), CmdSpec(..), proc, getPid, getProcessExitCode, shell, terminateProcess, interruptProcessGroupOf) import Utility.Misc import Utility.Exception +import Utility.Monad +import Utility.Debug import System.Exit import System.IO -import System.Log.Logger -import Control.Concurrent -import qualified Control.Exception as E -import Control.Monad - -type CreateProcessRunner = forall a. CreateProcess -> ((Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) -> IO a) -> IO a +import Control.Monad.IO.Class +import Control.Concurrent.Async +import qualified Data.ByteString as S data StdHandle = StdinHandle | StdoutHandle | StderrHandle deriving (Eq) @@ -62,21 +54,22 @@ data StdHandle = StdinHandle | StdoutHandle | StderrHandle -- | Normally, when reading from a process, it does not need to be fed any -- standard input. readProcess :: FilePath -> [String] -> IO String -readProcess cmd args = readProcessEnv cmd args Nothing +readProcess cmd args = readProcess' (proc cmd args) readProcessEnv :: FilePath -> [String] -> Maybe [(String, String)] -> IO String -readProcessEnv cmd args environ = readProcess' p - where - p = (proc cmd args) - { std_out = CreatePipe - , env = environ - } +readProcessEnv cmd args environ = + readProcess' $ (proc cmd args) { env = environ } readProcess' :: CreateProcess -> IO String -readProcess' p = withHandle StdoutHandle createProcessSuccess p $ \h -> do - output <- hGetContentsStrict h - hClose h - return output +readProcess' p = withCreateProcess p' go + where + p' = p { std_out = CreatePipe } + go _ (Just h) _ pid = do + output <- hGetContentsStrict h + hClose h + forceSuccessProcess p' pid + return output + go _ _ _ _ = error "internal" -- | Runs an action to write to a process on its stdin, -- returns its output, and also allows specifying the environment. @@ -85,32 +78,8 @@ writeReadProcessEnv -> [String] -> Maybe [(String, String)] -> (Maybe (Handle -> IO ())) - -> (Maybe (Handle -> IO ())) - -> IO String -writeReadProcessEnv cmd args environ writestdin adjusthandle = do - (Just inh, Just outh, _, pid) <- createProcess p - - maybe (return ()) (\a -> a inh) adjusthandle - maybe (return ()) (\a -> a outh) adjusthandle - - -- fork off a thread to start consuming the output - output <- hGetContents outh - outMVar <- newEmptyMVar - _ <- forkIO $ E.evaluate (length output) >> putMVar outMVar () - - -- now write and flush any input - maybe (return ()) (\a -> a inh >> hFlush inh) writestdin - hClose inh -- done with stdin - - -- wait on the output - takeMVar outMVar - hClose outh - - -- wait on the process - forceSuccessProcess p pid - - return output - + -> IO S.ByteString +writeReadProcessEnv cmd args environ writestdin = withCreateProcess p go where p = (proc cmd args) { std_in = CreatePipe @@ -118,6 +87,18 @@ writeReadProcessEnv cmd args environ writestdin adjusthandle = do , std_err = Inherit , env = environ } + + go (Just inh) (Just outh) _ pid = do + let reader = hClose outh `after` S.hGetContents outh + let writer = do + maybe (return ()) (\a -> a inh >> hFlush inh) writestdin + hClose inh + (output, ()) <- concurrently reader writer + + forceSuccessProcess p pid + + return output + go _ _ _ _ = error "internal" -- | Waits for a ProcessHandle, and throws an IOError if the process -- did not exit successfully. @@ -130,117 +111,15 @@ forceSuccessProcess' p (ExitFailure n) = fail $ showCmd p ++ " exited " ++ show n -- | Waits for a ProcessHandle and returns True if it exited successfully. --- Note that using this with createProcessChecked will throw away --- the Bool, and is only useful to ignore the exit code of a process, --- while still waiting for it. -} checkSuccessProcess :: ProcessHandle -> IO Bool checkSuccessProcess pid = do code <- waitForProcess pid return $ code == ExitSuccess -ignoreFailureProcess :: ProcessHandle -> IO Bool -ignoreFailureProcess pid = do - void $ waitForProcess pid - return True - --- | Runs createProcess, then an action on its handles, and then --- forceSuccessProcess. -createProcessSuccess :: CreateProcessRunner -createProcessSuccess p a = createProcessChecked (forceSuccessProcess p) p a - --- | Runs createProcess, then an action on its handles, and then --- a checker action on its exit code, which must wait for the process. -createProcessChecked :: (ProcessHandle -> IO b) -> CreateProcessRunner -createProcessChecked checker p a = do - t@(_, _, _, pid) <- createProcess p - r <- tryNonAsync $ a t - _ <- checker pid - either E.throw return r - --- | Leaves the process running, suitable for lazy streaming. --- Note: Zombies will result, and must be waited on. -createBackgroundProcess :: CreateProcessRunner -createBackgroundProcess p a = a =<< createProcess p - --- | Runs a CreateProcessRunner, on a CreateProcess structure, that --- is adjusted to pipe only from/to a single StdHandle, and passes --- the resulting Handle to an action. -withHandle - :: StdHandle - -> CreateProcessRunner - -> CreateProcess - -> (Handle -> IO a) - -> IO a -withHandle h creator p a = creator p' $ a . select - where - base = p - { std_in = Inherit - , std_out = Inherit - , std_err = Inherit - } - (select, p') = case h of - StdinHandle -> (stdinHandle, base { std_in = CreatePipe }) - StdoutHandle -> (stdoutHandle, base { std_out = CreatePipe }) - StderrHandle -> (stderrHandle, base { std_err = CreatePipe }) - --- | Like withHandle, but passes (stdin, stdout) handles to the action. -withIOHandles - :: CreateProcessRunner - -> CreateProcess - -> ((Handle, Handle) -> IO a) - -> IO a -withIOHandles creator p a = creator p' $ a . ioHandles - where - p' = p - { std_in = CreatePipe - , std_out = CreatePipe - , std_err = Inherit - } - --- | Like withHandle, but passes (stdout, stderr) handles to the action. -withOEHandles - :: CreateProcessRunner - -> CreateProcess - -> ((Handle, Handle) -> IO a) - -> IO a -withOEHandles creator p a = creator p' $ a . oeHandles - where - p' = p - { std_in = Inherit - , std_out = CreatePipe - , std_err = CreatePipe - } - -withNullHandle :: (Handle -> IO a) -> IO a -withNullHandle = withFile devNull WriteMode - --- | Forces the CreateProcessRunner to run quietly; --- both stdout and stderr are discarded. -withQuietOutput - :: CreateProcessRunner - -> CreateProcess - -> IO () -withQuietOutput creator p = withNullHandle $ \nullh -> do - let p' = p - { std_out = UseHandle nullh - , std_err = UseHandle nullh - } - creator p' $ const $ return () - --- | Stdout and stderr are discarded, while the process is fed stdin --- from the handle. -feedWithQuietOutput - :: CreateProcessRunner - -> CreateProcess - -> (Handle -> IO a) - -> IO a -feedWithQuietOutput creator p a = withFile devNull WriteMode $ \nullh -> do - let p' = p - { std_in = CreatePipe - , std_out = UseHandle nullh - , std_err = UseHandle nullh - } - creator p' $ a . stdinHandle +withNullHandle :: (MonadIO m, MonadMask m) => (Handle -> m a) -> m a +withNullHandle = bracket + (liftIO $ openFile devNull WriteMode) + (liftIO . hClose) devNull :: FilePath #ifndef mingw32_HOST_OS @@ -256,6 +135,7 @@ devNull = "\\\\.\\NUL" -- Get it wrong and the runtime crash will always happen, so should be -- easily noticed. type HandleExtractor = (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) -> Handle + stdinHandle :: HandleExtractor stdinHandle (Just h, _, _, _) = h stdinHandle _ = error "expected stdinHandle" @@ -265,12 +145,6 @@ stdoutHandle _ = error "expected stdoutHandle" stderrHandle :: HandleExtractor stderrHandle (_, _, Just h, _) = h stderrHandle _ = error "expected stderrHandle" -ioHandles :: (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) -> (Handle, Handle) -ioHandles (Just hin, Just hout, _, _) = (hin, hout) -ioHandles _ = error "expected ioHandles" -oeHandles :: (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) -> (Handle, Handle) -oeHandles (_, Just hout, Just herr, _) = (hout, herr) -oeHandles _ = error "expected oeHandles" processHandle :: (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) -> ProcessHandle processHandle (_, _, _, pid) = pid @@ -302,15 +176,26 @@ startInteractiveProcess cmd args environ = do -- | Wrapper around 'System.Process.createProcess' that does debug logging. createProcess :: CreateProcess -> IO (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) createProcess p = do - debugProcess p - Utility.Process.Shim.createProcess p + r@(_, _, _, h) <- Utility.Process.Shim.createProcess p + debugProcess p h + return r + +-- | Wrapper around 'System.Process.withCreateProcess' that does debug logging. +withCreateProcess :: CreateProcess -> (Maybe Handle -> Maybe Handle -> Maybe Handle -> ProcessHandle -> IO a) -> IO a +withCreateProcess p action = bracket (createProcess p) cleanupProcess + (\(m_in, m_out, m_err, ph) -> action m_in m_out m_err ph) -- | Debugging trace for a CreateProcess. -debugProcess :: CreateProcess -> IO () -debugProcess p = debugM "Utility.Process" $ unwords - [ action ++ ":" - , showCmd p - ] +debugProcess :: CreateProcess -> ProcessHandle -> IO () +debugProcess p h = do + pid <- getPid h + debug "Utility.Process" $ unwords $ + [ describePid pid + , action ++ ":" + , showCmd p + ] ++ case cwd p of + Nothing -> [] + Just c -> ["in", show c] where action | piped (std_in p) && piped (std_out p) = "chat" @@ -320,9 +205,121 @@ debugProcess p = debugM "Utility.Process" $ unwords piped Inherit = False piped _ = True +describePid :: Maybe Utility.Process.Shim.Pid -> String +describePid Nothing = "process" +describePid (Just p) = "process [" ++ show p ++ "]" + -- | Wrapper around 'System.Process.waitForProcess' that does debug logging. waitForProcess :: ProcessHandle -> IO ExitCode waitForProcess h = do + -- Have to get pid before waiting, which closes the ProcessHandle. + pid <- getPid h r <- Utility.Process.Shim.waitForProcess h - debugM "Utility.Process" ("process done " ++ show r) + debug "Utility.Process" (describePid pid ++ " done " ++ show r) return r + +cleanupProcess :: (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) -> IO () +#if MIN_VERSION_process(1,6,4) +cleanupProcess = Utility.Process.Shim.cleanupProcess +#else +cleanupProcess (mb_stdin, mb_stdout, mb_stderr, pid) = do + -- Unlike the real cleanupProcess, this does not wait + -- for the process to finish in the background, so if + -- the process ignores SIGTERM, this can block until the process + -- gets around the exiting. + terminateProcess pid + let void _ = return () + maybe (return ()) (void . tryNonAsync . hClose) mb_stdin + maybe (return ()) hClose mb_stdout + maybe (return ()) hClose mb_stderr + void $ waitForProcess pid +#endif + +{- | Like hGetLine, reads a line from the Handle. Returns Nothing if end of + - file is reached, or the handle is closed, or if the process has exited + - and there is nothing more buffered to read from the handle. + - + - This is useful to protect against situations where the process might + - have transferred the handle being read to another process, and so + - the handle could remain open after the process has exited. That is a rare + - situation, but can happen. Consider a the process that started up a + - daemon, and the daemon inherited stderr from it, rather than the more + - usual behavior of closing the file descriptor. Reading from stderr + - would block past the exit of the process. + - + - In that situation, this will detect when the process has exited, + - and avoid blocking forever. But will still return anything the process + - buffered to the handle before exiting. + - + - Note on newline mode: This ignores whatever newline mode is configured + - for the handle, because there is no way to query that. On Windows, + - it will remove any \r coming before the \n. On other platforms, + - it does not treat \r specially. + -} +hGetLineUntilExitOrEOF :: ProcessHandle -> Handle -> IO (Maybe String) +hGetLineUntilExitOrEOF ph h = go [] + where + go buf = do + ready <- waitforinputorerror smalldelay + if ready + then getloop buf go + else getProcessExitCode ph >>= \case + -- Process still running, wait longer. + Nothing -> go buf + -- Process is done. It's possible + -- that it output something and exited + -- since the prior hWaitForInput, + -- so check one more time for any buffered + -- output. + Just _ -> finalcheck buf + + finalcheck buf = do + ready <- waitforinputorerror 0 + if ready + then getloop buf finalcheck + -- No remaining buffered input, though the handle + -- may not be EOF if something else is keeping it + -- open. Treated the same as EOF. + else eofwithnolineend buf + + -- On exception, proceed as if there was input; + -- EOF and any encoding issues are dealt with + -- when reading from the handle. + waitforinputorerror t = hWaitForInput h t + `catchNonAsync` const (pure True) + + getchar = + catcherr EOF $ + -- If the handle is closed, reading from it is + -- an IllegalOperation. + catcherr IllegalOperation $ + Just <$> hGetChar h + where + catcherr t = catchIOErrorType t (const (pure Nothing)) + + getloop buf cont = + getchar >>= \case + Just c + | c == '\n' -> return (Just (gotline buf)) + | otherwise -> cont (c:buf) + Nothing -> eofwithnolineend buf + +#ifndef mingw32_HOST_OS + gotline buf = reverse buf +#else + gotline ('\r':buf) = reverse buf + gotline buf = reverse buf +#endif + + eofwithnolineend buf = return $ + if null buf + then Nothing -- no line read + else Just (reverse buf) + + -- Tenth of a second delay. If the process exits with the FD being + -- held open, will wait up to twice this long before returning. + -- This delay could be made smaller. However, that is an unusual + -- case, and making it too small would cause lots of wakeups while + -- waiting for output. Bearing in mind that this could be run on + -- many processes at the same time. + smalldelay = 100 -- milliseconds diff --git a/Utility/Process/Transcript.hs b/Utility/Process/Transcript.hs new file mode 100644 index 0000000..7bf94ff --- /dev/null +++ b/Utility/Process/Transcript.hs @@ -0,0 +1,97 @@ +{- Process transcript + - + - Copyright 2012-2020 Joey Hess <id@joeyh.name> + - + - License: BSD-2-clause + -} + +{-# LANGUAGE CPP #-} +{-# OPTIONS_GHC -fno-warn-tabs #-} + +module Utility.Process.Transcript ( + processTranscript, + processTranscript', + processTranscript'', +) where + +import Utility.Process + +import System.IO +import System.Exit +import Control.Concurrent.Async +import Control.Monad +#ifndef mingw32_HOST_OS +import Control.Exception +import qualified System.Posix.IO +#else +import Control.Applicative +#endif +import Data.Maybe +import Prelude + +-- | Runs a process and returns a transcript combining its stdout and +-- stderr, and whether it succeeded or failed. +processTranscript :: String -> [String] -> (Maybe String) -> IO (String, Bool) +processTranscript cmd opts = processTranscript' (proc cmd opts) + +-- | Also feeds the process some input. +processTranscript' :: CreateProcess -> Maybe String -> IO (String, Bool) +processTranscript' cp input = do + (t, c) <- processTranscript'' cp input + return (t, c == ExitSuccess) + +processTranscript'' :: CreateProcess -> Maybe String -> IO (String, ExitCode) +processTranscript'' cp input = do +#ifndef mingw32_HOST_OS +{- This implementation interleves stdout and stderr in exactly the order + - the process writes them. -} + let setup = do + (readf, writef) <- System.Posix.IO.createPipe + System.Posix.IO.setFdOption readf System.Posix.IO.CloseOnExec True + System.Posix.IO.setFdOption writef System.Posix.IO.CloseOnExec True + readh <- System.Posix.IO.fdToHandle readf + writeh <- System.Posix.IO.fdToHandle writef + return (readh, writeh) + let cleanup (readh, writeh) = do + hClose readh + hClose writeh + bracket setup cleanup $ \(readh, writeh) -> do + let cp' = cp + { std_in = if isJust input then CreatePipe else Inherit + , std_out = UseHandle writeh + , std_err = UseHandle writeh + } + withCreateProcess cp' $ \hin hout herr pid -> do + get <- asyncreader pid readh + writeinput input (hin, hout, herr, pid) + code <- waitForProcess pid + transcript <- wait get + return (transcript, code) +#else +{- This implementation for Windows puts stderr after stdout. -} + let cp' = cp + { std_in = if isJust input then CreatePipe else Inherit + , std_out = CreatePipe + , std_err = CreatePipe + } + withCreateProcess cp' $ \hin hout herr pid -> do + let p = (hin, hout, herr, pid) + getout <- asyncreader pid (stdoutHandle p) + geterr <- asyncreader pid (stderrHandle p) + writeinput input p + code <- waitForProcess pid + transcript <- (++) <$> wait getout <*> wait geterr + return (transcript, code) +#endif + where + asyncreader pid h = async $ reader pid h [] + reader pid h c = hGetLineUntilExitOrEOF pid h >>= \case + Nothing -> return (unlines (reverse c)) + Just l -> reader pid h (l:c) + writeinput (Just s) p = do + let inh = stdinHandle p + unless (null s) $ do + hPutStr inh s + hFlush inh + hClose inh + writeinput Nothing _ = return () diff --git a/Utility/QuickCheck.hs b/Utility/QuickCheck.hs index b0a39f3..96e31d5 100644 --- a/Utility/QuickCheck.hs +++ b/Utility/QuickCheck.hs @@ -1,25 +1,62 @@ {- QuickCheck with additional instances - - - Copyright 2012-2014 Joey Hess <id@joeyh.name> + - Copyright 2012-2020 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} {-# OPTIONS_GHC -fno-warn-orphans #-} +{-# OPTIONS_GHC -fno-warn-tabs #-} {-# LANGUAGE TypeSynonymInstances #-} module Utility.QuickCheck ( module X - , module Utility.QuickCheck + , TestableString + , fromTestableString + , TestableFilePath(..) + , nonNegative + , positive ) where import Test.QuickCheck as X import Data.Time.Clock.POSIX import Data.Ratio +import Data.Char import System.Posix.Types import Data.List.NonEmpty (NonEmpty(..)) import Prelude +{- A String, but Arbitrary is limited to ascii. + - + - When in a non-utf8 locale, String does not normally contain any non-ascii + - characters, except for ones in surrogate plane. Converting a string that + - does contain other unicode characters to a ByteString using the + - filesystem encoding (see GHC.IO.Encoding) will throw an exception, + - so use this instead to avoid quickcheck tests breaking unncessarily. + -} +newtype TestableString = TestableString + { fromTestableString :: String } + deriving (Show) + +instance Arbitrary TestableString where + arbitrary = TestableString . filter isAscii <$> arbitrary + +{- FilePath constrained to not be the empty string, not contain a NUL, + - and contain only ascii. + - + - No real-world filename can be empty or contain a NUL. So code can + - well be written that assumes that and using this avoids quickcheck + - tests breaking unncessarily. + -} +newtype TestableFilePath = TestableFilePath + { fromTestableFilePath :: FilePath } + deriving (Show) + +instance Arbitrary TestableFilePath where + arbitrary = (TestableFilePath . fromTestableString <$> arbitrary) + `suchThat` (not . null . fromTestableFilePath) + `suchThat` (not . any (== '\NUL') . fromTestableFilePath) + {- Times before the epoch are excluded. Half with decimal and half without. -} instance Arbitrary POSIXTime where arbitrary = do diff --git a/Utility/RawFilePath.hs b/Utility/RawFilePath.hs new file mode 100644 index 0000000..b39423d --- /dev/null +++ b/Utility/RawFilePath.hs @@ -0,0 +1,125 @@ +{- Portability shim for basic operations on RawFilePaths. + - + - On unix, this makes syscalls using RawFilesPaths as efficiently as + - possible. + - + - On Windows, filenames are in unicode, so RawFilePaths have to be + - decoded. So this library will work, but less efficiently than using + - FilePath would. However, this library also takes care to support long + - filenames on Windows, by either using other libraries that do, or by + - doing UNC-style conversion itself. + - + - Copyright 2019-2023 Joey Hess <id@joeyh.name> + - + - License: BSD-2-clause + -} + +{-# LANGUAGE CPP #-} +{-# OPTIONS_GHC -fno-warn-tabs #-} + +module Utility.RawFilePath ( + RawFilePath, + readSymbolicLink, + createSymbolicLink, + createLink, + removeLink, + getFileStatus, + getSymbolicLinkStatus, + doesPathExist, + getCurrentDirectory, + createDirectory, + setFileMode, + setOwnerAndGroup, + rename, + createNamedPipe, + fileAccess, +) where + +#ifndef mingw32_HOST_OS +import Utility.FileSystemEncoding (RawFilePath) +import System.Posix.Files.ByteString +import qualified System.Posix.Directory.ByteString as D + +-- | Checks if a file or directory exists. Note that a dangling symlink +-- will be false. +doesPathExist :: RawFilePath -> IO Bool +doesPathExist = fileExist + +getCurrentDirectory :: IO RawFilePath +getCurrentDirectory = D.getWorkingDirectory + +createDirectory :: RawFilePath -> IO () +createDirectory p = D.createDirectory p 0o777 + +#else +import System.PosixCompat (FileStatus, FileMode) +-- System.PosixCompat does not handle UNC-style conversion itself, +-- so all uses of it library have to be pre-converted below. See +-- https://github.com/jacobstanley/unix-compat/issues/56 +import qualified System.PosixCompat as P +import qualified System.Directory as D +import Utility.FileSystemEncoding +import Utility.Path.Windows + +readSymbolicLink :: RawFilePath -> IO RawFilePath +readSymbolicLink f = toRawFilePath <$> P.readSymbolicLink (fromRawFilePath f) + +createSymbolicLink :: RawFilePath -> RawFilePath -> IO () +createSymbolicLink a b = do + a' <- fromRawFilePath <$> convertToWindowsNativeNamespace a + b' <- fromRawFilePath <$> convertToWindowsNativeNamespace b + P.createSymbolicLink a' b' + +createLink :: RawFilePath -> RawFilePath -> IO () +createLink a b = do + a' <- fromRawFilePath <$> convertToWindowsNativeNamespace a + b' <- fromRawFilePath <$> convertToWindowsNativeNamespace b + P.createLink a' b' + +{- On windows, removeLink is not available, so only remove files, + - not symbolic links. -} +removeLink :: RawFilePath -> IO () +removeLink = D.removeFile . fromRawFilePath + +getFileStatus :: RawFilePath -> IO FileStatus +getFileStatus p = P.getFileStatus . fromRawFilePath + =<< convertToWindowsNativeNamespace p + +getSymbolicLinkStatus :: RawFilePath -> IO FileStatus +getSymbolicLinkStatus p = P.getSymbolicLinkStatus . fromRawFilePath + =<< convertToWindowsNativeNamespace p + +doesPathExist :: RawFilePath -> IO Bool +doesPathExist = D.doesPathExist . fromRawFilePath + +getCurrentDirectory :: IO RawFilePath +getCurrentDirectory = toRawFilePath <$> D.getCurrentDirectory + +createDirectory :: RawFilePath -> IO () +createDirectory = D.createDirectory . fromRawFilePath + +setFileMode :: RawFilePath -> FileMode -> IO () +setFileMode p m = do + p' <- fromRawFilePath <$> convertToWindowsNativeNamespace p + P.setFileMode p' m + +{- Using renamePath rather than the rename provided in unix-compat + - because of this bug https://github.com/jacobstanley/unix-compat/issues/56-} +rename :: RawFilePath -> RawFilePath -> IO () +rename a b = D.renamePath (fromRawFilePath a) (fromRawFilePath b) + +setOwnerAndGroup :: RawFilePath -> P.UserID -> P.GroupID -> IO () +setOwnerAndGroup p u g = do + p' <- fromRawFilePath <$> convertToWindowsNativeNamespace p + P.setOwnerAndGroup p' u g + +createNamedPipe :: RawFilePath -> FileMode -> IO () +createNamedPipe p m = do + p' <- fromRawFilePath <$> convertToWindowsNativeNamespace p + P.createNamedPipe p' m + +fileAccess :: RawFilePath -> Bool -> Bool -> Bool -> IO Bool +fileAccess p a b c = do + p' <- fromRawFilePath <$> convertToWindowsNativeNamespace p + P.fileAccess p' a b c +#endif diff --git a/Utility/Rsync.hs b/Utility/Rsync.hs index c6881b7..e377eb9 100644 --- a/Utility/Rsync.hs +++ b/Utility/Rsync.hs @@ -114,7 +114,7 @@ rsyncUrlIsPath s -} rsyncProgress :: OutputHandler -> MeterUpdate -> [CommandParam] -> IO Bool rsyncProgress oh meter ps = - commandMeter' parseRsyncProgress oh meter "rsync" (rsyncParamsFixup ps) >>= \case + commandMeterExitCode parseRsyncProgress oh Nothing meter "rsync" (rsyncParamsFixup ps) >>= \case Just ExitSuccess -> return True Just (ExitFailure exitcode) -> do when (exitcode /= 1) $ @@ -136,10 +136,10 @@ rsyncProgress oh meter ps = parseRsyncProgress :: ProgressParser parseRsyncProgress = go [] . reverse . progresschunks where - go remainder [] = (Nothing, remainder) + go remainder [] = (Nothing, Nothing, remainder) go remainder (x:xs) = case parsebytes (findbytesstart x) of Nothing -> go (delim:x++remainder) xs - Just b -> (Just (toBytesProcessed b), remainder) + Just b -> (Just (toBytesProcessed b), Nothing, remainder) delim = '\r' diff --git a/Utility/SafeCommand.hs b/Utility/SafeCommand.hs index 19d5f20..6f9419c 100644 --- a/Utility/SafeCommand.hs +++ b/Utility/SafeCommand.hs @@ -16,18 +16,13 @@ module Utility.SafeCommand ( safeSystem, safeSystem', safeSystemEnv, - shellWrap, - shellEscape, - shellUnEscape, segmentXargsOrdered, segmentXargsUnordered, - prop_isomorphic_shellEscape, - prop_isomorphic_shellEscape_multiword, ) where -import System.Exit import Utility.Process -import Utility.Split + +import System.Exit import System.FilePath import Data.Char import Data.List @@ -61,6 +56,8 @@ toCommand' (File s) = s -- | Run a system command, and returns True or False if it succeeded or failed. -- +-- (Throws an exception if the command is not found.) +-- -- This and other command running functions in this module log the commands -- run at debug level, using System.Log.Logger. boolSystem :: FilePath -> [CommandParam] -> IO Bool @@ -81,9 +78,9 @@ safeSystem :: FilePath -> [CommandParam] -> IO ExitCode safeSystem command params = safeSystem' command params id safeSystem' :: FilePath -> [CommandParam] -> (CreateProcess -> CreateProcess) -> IO ExitCode -safeSystem' command params mkprocess = do - (_, _, _, pid) <- createProcess p - waitForProcess pid +safeSystem' command params mkprocess = + withCreateProcess p $ \_ _ _ pid -> + waitForProcess pid where p = mkprocess $ proc command (toCommand params) @@ -91,44 +88,6 @@ safeSystemEnv :: FilePath -> [CommandParam] -> Maybe [(String, String)] -> IO Ex safeSystemEnv command params environ = safeSystem' command params $ \p -> p { env = environ } --- | Wraps a shell command line inside sh -c, allowing it to be run in a --- login shell that may not support POSIX shell, eg csh. -shellWrap :: String -> String -shellWrap cmdline = "sh -c " ++ shellEscape cmdline - --- | Escapes a filename or other parameter to be safely able to be exposed to --- the shell. --- --- This method works for POSIX shells, as well as other shells like csh. -shellEscape :: String -> String -shellEscape f = "'" ++ escaped ++ "'" - where - -- replace ' with '"'"' - escaped = intercalate "'\"'\"'" $ splitc '\'' f - --- | Unescapes a set of shellEscaped words or filenames. -shellUnEscape :: String -> [String] -shellUnEscape [] = [] -shellUnEscape s = word : shellUnEscape rest - where - (word, rest) = findword "" s - findword w [] = (w, "") - findword w (c:cs) - | c == ' ' = (w, cs) - | c == '\'' = inquote c w cs - | c == '"' = inquote c w cs - | otherwise = findword (w++[c]) cs - inquote _ w [] = (w, "") - inquote q w (c:cs) - | c == q = findword w cs - | otherwise = inquote q (w++[c]) cs - --- | For quickcheck. -prop_isomorphic_shellEscape :: String -> Bool -prop_isomorphic_shellEscape s = [s] == (shellUnEscape . shellEscape) s -prop_isomorphic_shellEscape_multiword :: [String] -> Bool -prop_isomorphic_shellEscape_multiword s = s == (shellUnEscape . unwords . map shellEscape) s - -- | Segments a list of filenames into groups that are all below the maximum -- command-line length limit. segmentXargsOrdered :: [FilePath] -> [[FilePath]] diff --git a/Utility/SafeOutput.hs b/Utility/SafeOutput.hs new file mode 100644 index 0000000..d781386 --- /dev/null +++ b/Utility/SafeOutput.hs @@ -0,0 +1,36 @@ +{- Safe output to the terminal of possibly attacker-controlled strings, + - avoiding displaying control characters. + - + - Copyright 2023 Joey Hess <id@joeyh.name> + - + - License: BSD-2-clause + -} + +{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, CPP #-} +{-# OPTIONS_GHC -fno-warn-tabs #-} + +module Utility.SafeOutput ( + safeOutput, + safeOutputChar, +) where + +import Data.Char +import qualified Data.ByteString as S + +class SafeOutputtable t where + safeOutput :: t -> t + +instance SafeOutputtable String where + safeOutput = filter safeOutputChar + +instance SafeOutputtable S.ByteString where + safeOutput = S.filter (safeOutputChar . chr . fromIntegral) + +safeOutputChar :: Char -> Bool +safeOutputChar c + | not (isControl c) = True + | c == '\n' = True + | c == '\t' = True + | c == '\DEL' = False + | ord c > 31 = True + | otherwise = False diff --git a/Utility/SimpleProtocol.hs b/Utility/SimpleProtocol.hs new file mode 100644 index 0000000..acd2439 --- /dev/null +++ b/Utility/SimpleProtocol.hs @@ -0,0 +1,151 @@ +{- Simple line-based protocols. + - + - Copyright 2013-2020 Joey Hess <id@joeyh.name> + - + - License: BSD-2-clause + -} + +{-# LANGUAGE FlexibleInstances #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Utility.SimpleProtocol ( + Sendable(..), + Receivable(..), + parseMessage, + Serializable(..), + Parser, + parseFail, + parse0, + parse1, + parse2, + parse3, + parse4, + parse5, + dupIoHandles, + getProtocolLine, +) where + +import Data.Char +import GHC.IO.Handle +import Text.Read + +import Common + +-- Messages that can be sent. +class Sendable m where + formatMessage :: m -> [String] + +-- Messages that can be received. +class Receivable m where + -- Passed the first word of the message, returns + -- a Parser that can be be fed the rest of the message to generate + -- the value. + parseCommand :: String -> Parser m + +parseMessage :: (Receivable m) => String -> Maybe m +parseMessage s = parseCommand command rest + where + (command, rest) = splitWord s + +class Serializable a where + serialize :: a -> String + deserialize :: String -> Maybe a + +instance Serializable [Char] where + serialize = id + deserialize = Just + +instance Serializable Integer where + serialize = show + deserialize = readMaybe + +instance Serializable ExitCode where + serialize ExitSuccess = "0" + serialize (ExitFailure n) = show n + deserialize "0" = Just ExitSuccess + deserialize s = ExitFailure <$> readMaybe s + +{- Parsing the parameters of messages. Using the right parseN ensures + - that the string is split into exactly the requested number of words, + - which allows the last parameter of a message to contain arbitrary + - whitespace, etc, without needing any special quoting. + -} +type Parser a = String -> Maybe a + +parseFail :: Parser a +parseFail _ = Nothing + +parse0 :: a -> Parser a +parse0 mk "" = Just mk +parse0 _ _ = Nothing + +parse1 :: Serializable p1 => (p1 -> a) -> Parser a +parse1 mk p1 = mk <$> deserialize p1 + +parse2 :: (Serializable p1, Serializable p2) => (p1 -> p2 -> a) -> Parser a +parse2 mk s = mk <$> deserialize p1 <*> deserialize p2 + where + (p1, p2) = splitWord s + +parse3 :: (Serializable p1, Serializable p2, Serializable p3) => (p1 -> p2 -> p3 -> a) -> Parser a +parse3 mk s = mk <$> deserialize p1 <*> deserialize p2 <*> deserialize p3 + where + (p1, rest) = splitWord s + (p2, p3) = splitWord rest + +parse4 :: (Serializable p1, Serializable p2, Serializable p3, Serializable p4) => (p1 -> p2 -> p3 -> p4 -> a) -> Parser a +parse4 mk s = mk <$> deserialize p1 <*> deserialize p2 <*> deserialize p3 <*> deserialize p4 + where + (p1, rest) = splitWord s + (p2, rest') = splitWord rest + (p3, p4) = splitWord rest' + +parse5 :: (Serializable p1, Serializable p2, Serializable p3, Serializable p4, Serializable p5) => (p1 -> p2 -> p3 -> p4 -> p5 -> a) -> Parser a +parse5 mk s = mk <$> deserialize p1 <*> deserialize p2 <*> deserialize p3 <*> deserialize p4 <*> deserialize p5 + where + (p1, rest) = splitWord s + (p2, rest') = splitWord rest + (p3, rest'') = splitWord rest' + (p4, p5) = splitWord rest'' + +splitWord :: String -> (String, String) +splitWord = separate isSpace + +{- When a program speaks a simple protocol over stdio, any other output + - to stdout (or anything that attempts to read from stdin) + - will mess up the protocol. To avoid that, close stdin, + - and duplicate stderr to stdout. Return two new handles + - that are duplicates of the original (stdin, stdout). -} +dupIoHandles :: IO (Handle, Handle) +dupIoHandles = do + readh <- hDuplicate stdin + writeh <- hDuplicate stdout + nullh <- openFile devNull ReadMode + nullh `hDuplicateTo` stdin + stderr `hDuplicateTo` stdout + return (readh, writeh) + +{- Reads a line, but to avoid super-long lines eating memory, returns + - Nothing if 32 kb have been read without seeing a '\n' + - + - If there is a '\r' before the '\n', it is removed, to support + - systems using "\r\n" at ends of lines + - + - This implementation is not super efficient, but as long as the Handle + - supports buffering, it avoids reading a character at a time at the + - syscall level. + - + - Throws isEOFError when no more input is available. + -} +getProtocolLine :: Handle -> IO (Maybe String) +getProtocolLine h = go (32768 :: Int) [] + where + go 0 _ = return Nothing + go n l = do + c <- hGetChar h + if c == '\n' + then return $ Just $ reverse $ + case l of + ('\r':rest) -> rest + _ -> l + else go (n-1) (c:l) diff --git a/Utility/SystemDirectory.hs b/Utility/SystemDirectory.hs index b9040fe..a7d60f9 100644 --- a/Utility/SystemDirectory.hs +++ b/Utility/SystemDirectory.hs @@ -1,4 +1,4 @@ -{- System.Directory without its conflicting isSymbolicLink +{- System.Directory without its conflicting isSymbolicLink and getFileSize. - - Copyright 2016 Joey Hess <id@joeyh.name> - diff --git a/Utility/ThreadScheduler.hs b/Utility/ThreadScheduler.hs index ef69ead..9ab94d9 100644 --- a/Utility/ThreadScheduler.hs +++ b/Utility/ThreadScheduler.hs @@ -15,6 +15,7 @@ module Utility.ThreadScheduler ( threadDelaySeconds, waitForTermination, oneSecond, + unboundDelay, ) where import Control.Monad diff --git a/Utility/TimeStamp.hs b/Utility/TimeStamp.hs new file mode 100644 index 0000000..b740d7b --- /dev/null +++ b/Utility/TimeStamp.hs @@ -0,0 +1,58 @@ +{- timestamp parsing and formatting + - + - Copyright 2015-2019 Joey Hess <id@joeyh.name> + - + - License: BSD-2-clause + -} + +module Utility.TimeStamp ( + parserPOSIXTime, + parsePOSIXTime, + formatPOSIXTime, +) where + +import Utility.Data + +import Data.Time.Clock.POSIX +import Data.Time +import Data.Ratio +import Control.Applicative +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as B8 +import qualified Data.Attoparsec.ByteString as A +import Data.Attoparsec.ByteString.Char8 (char, decimal, signed, isDigit_w8) + +{- Parses how POSIXTime shows itself: "1431286201.113452s" + - (The "s" is included for historical reasons and is optional.) + - Also handles the format with no decimal seconds. -} +parserPOSIXTime :: A.Parser POSIXTime +parserPOSIXTime = mkPOSIXTime + <$> signed decimal + <*> (declen <|> pure (0, 0)) + <* optional (char 's') + where + declen :: A.Parser (Integer, Int) + declen = do + _ <- char '.' + b <- A.takeWhile isDigit_w8 + let len = B.length b + d <- either fail pure $ + A.parseOnly (decimal <* A.endOfInput) b + return (d, len) + +parsePOSIXTime :: String -> Maybe POSIXTime +parsePOSIXTime s = eitherToMaybe $ + A.parseOnly (parserPOSIXTime <* A.endOfInput) (B8.pack s) + +{- This implementation allows for higher precision in a POSIXTime than + - supported by the system's Double, and avoids the complications of + - floating point. -} +mkPOSIXTime :: Integer -> (Integer, Int) -> POSIXTime +mkPOSIXTime n (d, dlen) + | n < 0 = fromIntegral n - fromRational r + | otherwise = fromIntegral n + fromRational r + where + r = d % (10 ^ dlen) + +formatPOSIXTime :: String -> POSIXTime -> String +formatPOSIXTime fmt t = formatTime defaultTimeLocale fmt (posixSecondsToUTCTime t) diff --git a/Utility/Tmp.hs b/Utility/Tmp.hs index 6ee592b..efb15bd 100644 --- a/Utility/Tmp.hs +++ b/Utility/Tmp.hs @@ -1,6 +1,6 @@ {- Temporary files. - - - Copyright 2010-2013 Joey Hess <id@joeyh.name> + - Copyright 2010-2020 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} @@ -14,22 +14,42 @@ module Utility.Tmp ( withTmpFile, withTmpFileIn, relatedTemplate, + openTmpFileIn, ) where import System.IO import System.FilePath import System.Directory import Control.Monad.IO.Class -import System.PosixCompat.Files +import System.IO.Error import Utility.Exception import Utility.FileSystemEncoding +import Utility.FileMode +import qualified Utility.RawFilePath as R type Template = String +{- This is the same as openTempFile, except when there is an + - error, it displays the template as well as the directory, + - to help identify what call was responsible. + -} +openTmpFileIn :: FilePath -> String -> IO (FilePath, Handle) +openTmpFileIn dir template = openTempFile dir template + `catchIO` decoraterrror + where + decoraterrror e = throwM $ + let loc = ioeGetLocation e ++ " template " ++ template + in annotateIOError e loc Nothing Nothing + {- Runs an action like writeFile, writing to a temp file first and - then moving it into place. The temp file is stored in the same - - directory as the final file to avoid cross-device renames. -} + - directory as the final file to avoid cross-device renames. + - + - While this uses a temp file, the file will end up with the same + - mode as it would when using writeFile, unless the writer action changes + - it. + -} viaTmp :: (MonadMask m, MonadIO m) => (FilePath -> v -> m ()) -> FilePath -> v -> m () viaTmp a file content = bracketIO setup cleanup use where @@ -37,14 +57,20 @@ viaTmp a file content = bracketIO setup cleanup use template = relatedTemplate (base ++ ".tmp") setup = do createDirectoryIfMissing True dir - openTempFile dir template + openTmpFileIn dir template cleanup (tmpfile, h) = do _ <- tryIO $ hClose h tryIO $ removeFile tmpfile use (tmpfile, h) = do + let tmpfile' = toRawFilePath tmpfile + -- Make mode the same as if the file were created usually, + -- not as a temp file. (This may fail on some filesystems + -- that don't support file modes well, so ignore + -- exceptions.) + _ <- liftIO $ tryIO $ R.setFileMode tmpfile' =<< defaultFileMode liftIO $ hClose h a tmpfile content - liftIO $ rename tmpfile file + liftIO $ R.rename tmpfile' (toRawFilePath file) {- Runs an action with a tmp file located in the system's tmp directory - (or in "." if there is none) then removes the file. -} @@ -54,11 +80,15 @@ withTmpFile template a = do withTmpFileIn tmpdir template a {- Runs an action with a tmp file located in the specified directory, - - then removes the file. -} + - then removes the file. + - + - Note that the tmp file will have a file mode that only allows the + - current user to access it. + -} withTmpFileIn :: (MonadIO m, MonadMask m) => FilePath -> Template -> (FilePath -> Handle -> m a) -> m a withTmpFileIn tmpdir template a = bracket create remove use where - create = liftIO $ openTempFile tmpdir template + create = liftIO $ openTmpFileIn tmpdir template remove (name, h) = liftIO $ do hClose h catchBoolIO (removeFile name >> return True) diff --git a/Utility/Tmp/Dir.hs b/Utility/Tmp/Dir.hs index c68ef86..904b65a 100644 --- a/Utility/Tmp/Dir.hs +++ b/Utility/Tmp/Dir.hs @@ -1,6 +1,6 @@ {- Temporary directories - - - Copyright 2010-2013 Joey Hess <id@joeyh.name> + - Copyright 2010-2022 Joey Hess <id@joeyh.name> - - License: BSD-2-clause -} @@ -63,8 +63,10 @@ removeTmpDir tmpdir = liftIO $ whenM (doesDirectoryExist tmpdir) $ do -- after a process has just written to it and exited. -- Because it's crap, presumably. So, ignore failure -- to delete the temp directory. - _ <- tryIO $ removeDirectoryRecursive tmpdir + _ <- tryIO $ go tmpdir return () #else - removeDirectoryRecursive tmpdir + go tmpdir #endif + where + go = removeDirectoryRecursive diff --git a/Utility/Url/Parse.hs b/Utility/Url/Parse.hs new file mode 100644 index 0000000..7fc952b --- /dev/null +++ b/Utility/Url/Parse.hs @@ -0,0 +1,63 @@ +{- Url parsing. + - + - Copyright 2011-2023 Joey Hess <id@joeyh.name> + - + - License: BSD-2-clause + -} + +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} + +module Utility.Url.Parse ( + parseURIPortable, + parseURIRelaxed, +) where + +import Network.URI +#ifdef mingw32_HOST_OS +import qualified System.FilePath.Windows as PW +#endif + +{- On unix this is the same as parseURI. But on Windows, + - it can parse urls such as file:///C:/path/to/file + - parseURI normally parses that as a path /C:/path/to/file + - and this simply removes the excess leading slash when there is a + - drive letter after it. -} +parseURIPortable :: String -> Maybe URI +#ifndef mingw32_HOST_OS +parseURIPortable = parseURI +#else +parseURIPortable s + | "file:" `isPrefixOf` s = do + u <- parseURI s + return $ case PW.splitDirectories (uriPath u) of + (p:d:_) | all PW.isPathSeparator p && PW.isDrive d -> + u { uriPath = dropWhile PW.isPathSeparator (uriPath u) } + _ -> u + | otherwise = parseURI s +#endif + +{- Allows for spaces and other stuff in urls, properly escaping them. -} +parseURIRelaxed :: String -> Maybe URI +parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $ + parseURIPortable $ escapeURIString isAllowedInURI s + +{- Some characters like '[' are allowed in eg, the address of + - an uri, but cannot appear unescaped further along in the uri. + - This handles that, expensively, by successively escaping each character + - from the back of the url until the url parses. + -} +parseURIRelaxed' :: String -> Maybe URI +parseURIRelaxed' s = go [] (reverse s) + where + go back [] = parseURI back + go back (c:cs) = case parseURI (escapeURIString isAllowedInURI (reverse (c:cs)) ++ back) of + Just u -> Just u + Nothing -> go (escapeURIChar escapemore c ++ back) cs + + escapemore '[' = False + escapemore ']' = False + escapemore c = isAllowedInURI c diff --git a/Utility/UserInfo.hs b/Utility/UserInfo.hs index 17ce8db..827229d 100644 --- a/Utility/UserInfo.hs +++ b/Utility/UserInfo.hs @@ -19,31 +19,32 @@ import Utility.Exception #ifndef mingw32_HOST_OS import Utility.Data import Control.Applicative +import System.Posix.User +#if MIN_VERSION_unix(2,8,0) +import System.Posix.User.ByteString (UserEntry) +#endif #endif -import System.PosixCompat import Prelude {- Current user's home directory. - - getpwent will fail on LDAP or NIS, so use HOME if set. -} myHomeDir :: IO FilePath -myHomeDir = either giveup return =<< myVal env homeDirectory - where +myHomeDir = either giveup return =<< #ifndef mingw32_HOST_OS - env = ["HOME"] + myVal ["HOME"] homeDirectory #else - env = ["USERPROFILE", "HOME"] -- HOME is used in Cygwin + myVal ["USERPROFILE", "HOME"] -- HOME is used in Cygwin #endif {- Current user's user name. -} myUserName :: IO (Either String String) -myUserName = myVal env userName - where +myUserName = #ifndef mingw32_HOST_OS - env = ["USER", "LOGNAME"] + myVal ["USER", "LOGNAME"] userName #else - env = ["USERNAME", "USER", "LOGNAME"] + myVal ["USERNAME", "USER", "LOGNAME"] #endif myUserGecos :: IO (Maybe String) @@ -54,16 +55,20 @@ myUserGecos = return Nothing myUserGecos = eitherToMaybe <$> myVal [] userGecos #endif +#ifndef mingw32_HOST_OS myVal :: [String] -> (UserEntry -> String) -> IO (Either String String) myVal envvars extract = go envvars where go [] = either (const $ envnotset) (Right . extract) <$> get go (v:vs) = maybe (go vs) (return . Right) =<< getEnv v -#ifndef mingw32_HOST_OS -- This may throw an exception if the system doesn't have a -- passwd file etc; don't let it crash. get = tryNonAsync $ getUserEntryForID =<< getEffectiveUserID #else - get = return envnotset +myVal :: [String] -> IO (Either String String) +myVal envvars = go envvars + where + go [] = return envnotset + go (v:vs) = maybe (go vs) (return . Right) =<< getEnv v #endif envnotset = Left ("environment not set: " ++ show envvars) |