diff options
author | Joey Hess <joeyh@joeyh.name> | 2023-08-14 12:06:32 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2023-08-14 12:12:52 -0400 |
commit | edf83982be214f3c839fab9b659f645de53a9100 (patch) | |
tree | bef06cb750379c6d7942fc13b13fcb328201354c /Utility/Url | |
parent | f0cd3a2a3758ddcd2f0900c16bdc1fb80bbd6e92 (diff) | |
download | git-repair-edf83982be214f3c839fab9b659f645de53a9100.tar.gz |
merge from git-annex
Support building with unix-compat 0.7
Diffstat (limited to 'Utility/Url')
-rw-r--r-- | Utility/Url/Parse.hs | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/Utility/Url/Parse.hs b/Utility/Url/Parse.hs new file mode 100644 index 0000000..7fc952b --- /dev/null +++ b/Utility/Url/Parse.hs @@ -0,0 +1,63 @@ +{- Url parsing. + - + - Copyright 2011-2023 Joey Hess <id@joeyh.name> + - + - License: BSD-2-clause + -} + +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} + +module Utility.Url.Parse ( + parseURIPortable, + parseURIRelaxed, +) where + +import Network.URI +#ifdef mingw32_HOST_OS +import qualified System.FilePath.Windows as PW +#endif + +{- On unix this is the same as parseURI. But on Windows, + - it can parse urls such as file:///C:/path/to/file + - parseURI normally parses that as a path /C:/path/to/file + - and this simply removes the excess leading slash when there is a + - drive letter after it. -} +parseURIPortable :: String -> Maybe URI +#ifndef mingw32_HOST_OS +parseURIPortable = parseURI +#else +parseURIPortable s + | "file:" `isPrefixOf` s = do + u <- parseURI s + return $ case PW.splitDirectories (uriPath u) of + (p:d:_) | all PW.isPathSeparator p && PW.isDrive d -> + u { uriPath = dropWhile PW.isPathSeparator (uriPath u) } + _ -> u + | otherwise = parseURI s +#endif + +{- Allows for spaces and other stuff in urls, properly escaping them. -} +parseURIRelaxed :: String -> Maybe URI +parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $ + parseURIPortable $ escapeURIString isAllowedInURI s + +{- Some characters like '[' are allowed in eg, the address of + - an uri, but cannot appear unescaped further along in the uri. + - This handles that, expensively, by successively escaping each character + - from the back of the url until the url parses. + -} +parseURIRelaxed' :: String -> Maybe URI +parseURIRelaxed' s = go [] (reverse s) + where + go back [] = parseURI back + go back (c:cs) = case parseURI (escapeURIString isAllowedInURI (reverse (c:cs)) ++ back) of + Just u -> Just u + Nothing -> go (escapeURIChar escapemore c ++ back) cs + + escapemore '[' = False + escapemore ']' = False + escapemore c = isAllowedInURI c |