From 13c408d2295597540f0b2dfb6f7b86e739876c90 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 12 Sep 2016 22:35:47 -0400 Subject: implement client-server Proof Of Work Mashed up a argon2-based PoW with token buckets and bloom filters. This is intended to prevent a few abuses including: * Using a keysafe server for general file storage, by storing a whole lot of chunks. * An attacker guessing names that people will use, and uploading junk to keysafe servers under those names, to make it harder for others to use keysafe later. * An attacker trying to guess the names used for objects on keysafe servers in order to download them and start password cracking. (As a second level of defense, since the name generation hash is expensive already.) Completely untested, but it builds! This commit was sponsored by Andreas on Patreon. --- Benchmark.hs | 82 +++++++++++++++++++++ ByteStrings.hs | 30 ++++++++ Encryption.hs | 31 ++------ ExpensiveHash.hs | 61 ---------------- HTTP.hs | 39 +++++++--- HTTP/Client.hs | 6 +- HTTP/ProofOfWork.hs | 116 +++++++++++++++++++++++++++--- HTTP/RateLimit.hs | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++++ HTTP/Server.hs | 21 +++--- Storage/Network.hs | 46 ++++++++---- TODO | 2 +- Tunables.hs | 17 +++++ keysafe.cabal | 5 ++ keysafe.hs | 2 +- stack.yaml | 1 + 15 files changed, 528 insertions(+), 134 deletions(-) create mode 100644 Benchmark.hs create mode 100644 ByteStrings.hs create mode 100644 HTTP/RateLimit.hs diff --git a/Benchmark.hs b/Benchmark.hs new file mode 100644 index 0000000..21b7ce3 --- /dev/null +++ b/Benchmark.hs @@ -0,0 +1,82 @@ +{-# LANGUAGE OverloadedStrings #-} + +{- Copyright 2016 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Benchmark where + +import Types +import Tunables +import ExpensiveHash +import Cost +import Serialization () +import qualified Data.ByteString.UTF8 as BU8 +import qualified Crypto.Argon2 as Argon2 +import Data.Time.Clock +import Control.DeepSeq +import Control.Monad +import Data.Monoid +import Data.Maybe + +benchmarkExpensiveHash :: Int -> ExpensiveHashTunable -> IO (BenchmarkResult (Cost CreationOp)) +benchmarkExpensiveHash rounds tunables = + benchmarkExpensiveHash' rounds tunables (getexpected tunables) + where + getexpected (UseArgon2 cost _) = mapCost (* fromIntegral rounds) cost + +benchmarkExpensiveHash' :: Int -> ExpensiveHashTunable -> Cost op -> IO (BenchmarkResult (Cost op)) +benchmarkExpensiveHash' rounds tunables@(UseArgon2 _ hashopts) expected = do + numcores <- fromIntegral . fromMaybe (error "Unknown number of physical cores.") + <$> getNumCores + start <- getCurrentTime + forM_ [1..rounds] $ \n -> do + -- Must vary the data being hashed to avoid laziness + -- caching hash results. + let base = BU8.fromString (show n) + let ExpensiveHash _ t = expensiveHash tunables + (Salt (GpgKey (KeyId (base <> "dummy")))) + (base <> "himom") + t `deepseq` return () + end <- getCurrentTime + let diff = floor $ end `diffUTCTime` start + let maxthreads = Argon2.hashParallelism hashopts + let actual = CPUCost (Seconds diff) (Divisibility $ fromIntegral maxthreads) + -- The expected cost is for a single core, so adjust it + -- based on the number of cores, up to a maximum of the number + -- of threads that the hash is configred to use. + let usedcores = min maxthreads numcores + let adjustedexpected = mapCost (`div` fromIntegral usedcores) expected + return $ BenchmarkResult + { expectedBenchmark = adjustedexpected + , actualBenchmark = actual + } + +benchmarkTunables :: Tunables -> IO () +benchmarkTunables tunables = do + putStrLn "/proc/cpuinfo:" + putStrLn =<< readFile "/proc/cpuinfo" + + putStrLn $ "Benchmarking 16 rounds of proof of work hash..." + print =<< benchmarkExpensiveHash 16 (proofOfWorkHashTunable 0) + + -- Rather than run all 256 rounds of this hash, which would + -- probably take on the order of 1 hour, run only 16, and scale + -- the expected cost accordingly. + let normalrounds = fromIntegral $ + 256 * randomSaltBytes (keyEncryptionKeyTunable tunables) + putStrLn $ "Benchmarking 16/" ++ show normalrounds ++ " rounds of key encryption key hash..." + r <- benchmarkExpensiveHash' 16 + (keyEncryptionKeyHash $ keyEncryptionKeyTunable tunables) + (mapCost (`div` (normalrounds `div` 16)) $ randomSaltBytesBruteForceCost $ keyEncryptionKeyTunable tunables) + print r + putStrLn $ "Estimated time for " ++ show normalrounds ++ " rounds of key encryption key hash..." + print $ BenchmarkResult + { expectedBenchmark = mapCost (* 16) (expectedBenchmark r) + , actualBenchmark = mapCost (* 16) (actualBenchmark r) + } + + putStrLn "Benchmarking 1 round of name generation hash..." + print =<< benchmarkExpensiveHash 1 + (nameGenerationHash $ nameGenerationTunable tunables) diff --git a/ByteStrings.hs b/ByteStrings.hs new file mode 100644 index 0000000..02e22ab --- /dev/null +++ b/ByteStrings.hs @@ -0,0 +1,30 @@ +{- Copyright 2016 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module ByteStrings where + +import qualified Data.ByteString as B + +allByteStringsOfLength :: Int -> [B.ByteString] +allByteStringsOfLength = go [] + where + go ws n + | n == 0 = return (B.pack ws) + | otherwise = do + w <- [0..255] + go (w:ws) (n-1) + +-- | Contains every possible byte strings, with shorter ones first. +allByteStrings :: [B.ByteString] +allByteStrings = concatMap allByteStringsOfLength [1..] + +chunkByteString :: Int -> B.ByteString -> [B.ByteString] +chunkByteString n = go [] + where + go cs b + | B.length b <= n = reverse (b:cs) + | otherwise = + let (h, t) = B.splitAt n b + in go (h:cs) t diff --git a/Encryption.hs b/Encryption.hs index b084c27..12edbc6 100644 --- a/Encryption.hs +++ b/Encryption.hs @@ -12,9 +12,11 @@ import Types import Tunables import Cost import ExpensiveHash +import ByteStrings import Data.Monoid import Data.Maybe import Data.Word +import Control.Monad import qualified Raaz import qualified Raaz.Cipher.AES as Raaz import qualified Raaz.Cipher.Internal as Raaz @@ -31,7 +33,7 @@ cipher = Raaz.aes256cbc encrypt :: Tunables -> KeyEncryptionKey -> SecretKey -> EncryptedSecretKey encrypt tunables kek (SecretKey secret) = - EncryptedSecretKey (chunk (objectSize tunables) b) (keyBruteForceCalc kek) + EncryptedSecretKey (chunkByteString (objectSize tunables) b) (keyBruteForceCalc kek) where -- Raaz does not seem to provide a high-level interface -- for AES encryption, so use unsafeEncrypt. The use of @@ -138,24 +140,6 @@ candidateKeyEncryptionKeys tunables name password = saltprefixes = allByteStringsOfLength $ randomSaltBytes $ keyEncryptionKeyTunable tunables -allByteStringsOfLength :: Int -> [B.ByteString] -allByteStringsOfLength = go [] - where - go ws n - | n == 0 = return (B.pack ws) - | otherwise = do - w <- [0..255] - go (w:ws) (n-1) - -chunk :: Int -> B.ByteString -> [B.ByteString] -chunk n = go [] - where - go cs b - | B.length b <= n = reverse (b:cs) - | otherwise = - let (h, t) = B.splitAt n b - in go (h:cs) t - -- Use the sha256 of the name (truncated) as the IV. genIV :: Name -> Raaz.IV genIV (Name name) = @@ -168,13 +152,10 @@ genIV (Name name) = type SaltPrefix = B.ByteString genRandomSaltPrefix :: Raaz.SystemPRG -> Tunables -> IO SaltPrefix -genRandomSaltPrefix prg tunables = go [] - (randomSaltBytes $ keyEncryptionKeyTunable tunables) +genRandomSaltPrefix prg tunables = B.pack <$> replicateM n randbyte where - go ws 0 = return (B.pack ws) - go ws n = do - b <- Raaz.random prg :: IO Word8 - go (b:ws) (n-1) + n = randomSaltBytes $ keyEncryptionKeyTunable tunables + randbyte = Raaz.random prg :: IO Word8 instance Raaz.Random Word8 diff --git a/ExpensiveHash.hs b/ExpensiveHash.hs index b46b23c..6fab15c 100644 --- a/ExpensiveHash.hs +++ b/ExpensiveHash.hs @@ -7,20 +7,14 @@ module ExpensiveHash where -import Types import Tunables import Cost import Serialization () import qualified Data.Text as T import qualified Data.ByteString as B -import qualified Data.ByteString.UTF8 as BU8 import qualified Crypto.Argon2 as Argon2 import Raaz.Core.Encode -import Data.Time.Clock -import Control.DeepSeq -import Control.Monad import Data.Monoid -import Data.Maybe -- | A hash that is expensive to calculate. -- @@ -46,58 +40,3 @@ expensiveHash (UseArgon2 cost opts) (Salt s) b = ExpensiveHash cost $ argonsalt = let sb = toByteString s in sb <> B.replicate (8 - B.length sb ) 32 - -benchmarkExpensiveHash :: Int -> ExpensiveHashTunable -> Cost op -> IO (BenchmarkResult (Cost op)) -benchmarkExpensiveHash rounds tunables@(UseArgon2 _ hashopts) expected = do - numcores <- fromIntegral . fromMaybe (error "Unknown number of physical cores.") - <$> getNumCores - start <- getCurrentTime - forM_ [1..rounds] $ \n -> do - -- Must vary the data being hashed to avoid laziness - -- caching hash results. - let base = BU8.fromString (show n) - let ExpensiveHash _ t = expensiveHash tunables - (Salt (GpgKey (KeyId (base <> "dummy")))) - (base <> "himom") - t `deepseq` return () - end <- getCurrentTime - let diff = floor $ end `diffUTCTime` start - let maxthreads = Argon2.hashParallelism hashopts - let actual = CPUCost (Seconds diff) (Divisibility $ fromIntegral maxthreads) - -- The expected cost is for a single core, so adjust it - -- based on the number of cores, up to a maximum of the number - -- of threads that the hash is configred to use. - let usedcores = min maxthreads numcores - let adjustedexpected = mapCost (`div` fromIntegral usedcores) expected - return $ BenchmarkResult - { expectedBenchmark = adjustedexpected - , actualBenchmark = actual - } - -benchmarkTunables :: Tunables -> IO () -benchmarkTunables tunables = do - putStrLn "/proc/cpuinfo:" - putStrLn =<< readFile "/proc/cpuinfo" - - -- Rather than run all 256 rounds of this hash, which would - -- probably take on the order of 1 hour, run only 16, and scale - -- the expected cost accordingly. - let normalrounds = fromIntegral $ - 256 * randomSaltBytes (keyEncryptionKeyTunable tunables) - putStrLn $ "Benchmarking 16/" ++ show normalrounds ++ " rounds of key encryption key hash..." - r <- benchmarkExpensiveHash 16 - (keyEncryptionKeyHash $ keyEncryptionKeyTunable tunables) - (mapCost (`div` (normalrounds `div` 16)) $ randomSaltBytesBruteForceCost $ keyEncryptionKeyTunable tunables) - print r - putStrLn $ "Estimated time for " ++ show normalrounds ++ " rounds of key encryption key hash..." - print $ BenchmarkResult - { expectedBenchmark = mapCost (* 16) (expectedBenchmark r) - , actualBenchmark = mapCost (* 16) (actualBenchmark r) - } - - putStrLn "Benchmarking 1 round of name generation hash..." - print =<< benchmarkExpensiveHash 1 - (nameGenerationHash $ nameGenerationTunable tunables) - (getexpected $ nameGenerationHash $ nameGenerationTunable tunables) - where - getexpected (UseArgon2 cost _) = cost diff --git a/HTTP.hs b/HTTP.hs index db9ef4d..702a806 100644 --- a/HTTP.hs +++ b/HTTP.hs @@ -20,40 +20,47 @@ import Servant.API import Data.Text import Data.Aeson.Types import GHC.Generics hiding (V1) +import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as L import qualified Raaz.Core.Encode as Raaz +import Data.Monoid +import Prelude -- | Keysafe's http API type HttpAPI = "keysafe" :> V1 :> "motd" :> Get '[JSON] Motd :<|> "keysafe" :> V1 :> "objects" :> ObjectIdent :> POWParam - :> Get '[JSON] (ProofOfWorkRequirement StorableObject) + :> Get '[JSON] (POWGuarded StorableObject) :<|> "keysafe" :> V1 :> "objects" :> ObjectIdent :> POWParam :> ReqBody '[OctetStream] StorableObject - :> Put '[JSON] (ProofOfWorkRequirement StoreResult) - :<|> "keysafe" :> V1 :> "stats" :> "countobjects" :> POWParam - :> Get '[JSON] (ProofOfWorkRequirement CountResult) + :> Put '[JSON] (POWGuarded StoreResult) + :<|> "keysafe" :> V1 :> "stats" :> "countobjects" + :> Get '[JSON] CountResult type V1 = "v1" newtype Motd = Motd Text deriving (Generic) +data POWGuarded t + = Result t + | NeedProofOfWork ProofOfWorkRequirement + deriving (Generic) + type POWParam = QueryParam "proofofwork" ProofOfWork type ObjectIdent = Capture "ident" StorableObjectIdent instance ToJSON Motd instance FromJSON Motd -instance ToJSON t => ToJSON (ProofOfWorkRequirement t) -instance FromJSON t => FromJSON (ProofOfWorkRequirement t) - -instance FromHttpApiData ProofOfWork where - parseUrlPiece = Right . ProofOfWork -instance ToHttpApiData ProofOfWork where - toUrlPiece (ProofOfWork t) = t +instance ToJSON t => ToJSON (POWGuarded t) +instance FromJSON t => FromJSON (POWGuarded t) +instance ToJSON ProofOfWorkRequirement +instance FromJSON ProofOfWorkRequirement +instance ToJSON RandomSalt +instance FromJSON RandomSalt -- StorableObjectIdent contains a hash, which is valid UTF-8. instance ToHttpApiData StorableObjectIdent where @@ -75,6 +82,16 @@ instance FromJSON StorableObject where parseJSON (Object v) = StorableObject <$> (unb64 =<< v .: "data") parseJSON invalid = typeMismatch "StorableObject" invalid +-- ProofOfWork contains an arbitrary bytestring and is base64 encoded in +-- the query string. +instance ToHttpApiData ProofOfWork where + toUrlPiece (ProofOfWork b (RandomSalt s)) = s <> ":" <> b64 b +instance FromHttpApiData ProofOfWork where + parseUrlPiece t = do + let (s, rest) = T.break (/= ':') t + b <- unb64 (T.drop 1 rest) + return (ProofOfWork b (RandomSalt s)) + b64 :: B.ByteString -> Text b64 v = T.decodeUtf8 $ Raaz.toByteString (Raaz.encode v :: Raaz.Base64) diff --git a/HTTP/Client.hs b/HTTP/Client.hs index 0c28005..8f81db4 100644 --- a/HTTP/Client.hs +++ b/HTTP/Client.hs @@ -18,7 +18,7 @@ httpAPI :: Proxy HttpAPI httpAPI = Proxy motd :: Manager -> BaseUrl -> ClientM Motd -getObject :: StorableObjectIdent -> Maybe ProofOfWork -> Manager -> BaseUrl -> ClientM (ProofOfWorkRequirement StorableObject) -putObject :: StorableObjectIdent -> Maybe ProofOfWork -> StorableObject -> Manager -> BaseUrl -> ClientM (ProofOfWorkRequirement StoreResult) -countObjects :: Maybe ProofOfWork -> Manager -> BaseUrl -> ClientM (ProofOfWorkRequirement CountResult) +getObject :: StorableObjectIdent -> Maybe ProofOfWork -> Manager -> BaseUrl -> ClientM (POWGuarded StorableObject) +putObject :: StorableObjectIdent -> Maybe ProofOfWork -> StorableObject -> Manager -> BaseUrl -> ClientM (POWGuarded StoreResult) +countObjects :: Manager -> BaseUrl -> ClientM CountResult motd :<|> getObject :<|> putObject :<|> countObjects = client httpAPI diff --git a/HTTP/ProofOfWork.hs b/HTTP/ProofOfWork.hs index 45cc96d..04aec57 100644 --- a/HTTP/ProofOfWork.hs +++ b/HTTP/ProofOfWork.hs @@ -4,19 +4,117 @@ -} {-# LANGUAGE DeriveGeneric #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} module HTTP.ProofOfWork where import Types -import qualified Data.Text as T +import Types.Cost +import ExpensiveHash +import Tunables +import ByteStrings import GHC.Generics +import qualified Data.Text as T +import qualified Data.ByteString as B +import Data.Text.Encoding (encodeUtf8) +import Raaz.Core.Encode +import qualified Raaz +import Data.BloomFilter.Hash +import Control.Monad +import Data.Word +import Data.Bits +import Data.Monoid +import Prelude + +-- | A value that the client has to do some work to calculate. +data ProofOfWork = ProofOfWork B.ByteString RandomSalt + deriving (Show) + +data ProofOfWorkRequirement = ProofOfWorkRequirement + { leadingZeros :: Int + , addedArgon2Iterations :: Word32 + , randomSalt :: RandomSalt + } + deriving (Generic, Show) + +-- | Using Text and not ByteString so that ProofOfWorkRequirement can have a +-- JSON instance. +newtype RandomSalt = RandomSalt { fromRandomSalt :: T.Text } + deriving (Generic, Show, Eq) + +instance Hashable RandomSalt where + hashIO32 = hashIO32 . encodeUtf8 . fromRandomSalt + hashIO64 = hashIO64 . encodeUtf8 . fromRandomSalt + +-- | Servers should never demand a proof of work that takes longer than +-- this to generate. Note that if a server changes its mind and doubles +-- the proof of work, a client counts that cumulatively. So, a server +-- should avoid any single proof of work requirement taking more than half +-- this long. +maxProofOfWork :: Seconds +maxProofOfWork = Seconds (16*60) + +-- | How long it will take to generate a proof of work meeting the +-- requirement, maximum. +-- +-- Of course, a client can get lucky and find a value that works +-- on the very first try. On average, the client will need to work for half +-- as long as the returned number of Seconds. +generationTime :: ProofOfWorkRequirement -> Seconds +generationTime req = + let UseArgon2 (CPUCost (Seconds s) _) _ = proofOfWorkHashTunable (addedArgon2Iterations req) + in Seconds ((2^(leadingZeros req)) * s) + +mkProofOfWorkRequirement :: Seconds -> Maybe (RandomSalt -> ProofOfWorkRequirement) +mkProofOfWorkRequirement (Seconds n) + | lz < 1 = Nothing + | otherwise = Just $ ProofOfWorkRequirement lz its + where + lz = floor (logBase 2 (max 1 (fromIntegral n / fromIntegral s)) :: Double) + UseArgon2 (CPUCost (Seconds s) _) _ = proofOfWorkHashTunable its + its = 0 + +mkRandomSalt :: IO RandomSalt +mkRandomSalt = do + prg <- Raaz.newPRG () :: IO Raaz.SystemPRG + rs <- replicateM 16 (Raaz.random prg :: IO Word8) + return $ RandomSalt $ T.pack $ concatMap show rs + +instance Raaz.Random Word8 + +isValidProofOfWork :: ProofOfWork -> ProofOfWorkRequirement -> StorableObjectIdent -> Bool +isValidProofOfWork (ProofOfWork pow rsalt) req (StorableObjectIdent n) = + samesalts && enoughzeros + where + samesalts = rsalt == randomSalt req + enoughzeros = all (== False) (take (leadingZeros req) (setBits b)) + tunable = proofOfWorkHashTunable (addedArgon2Iterations req) + salt = Salt $ POWSalt $ + encodeUtf8 (fromRandomSalt (randomSalt req)) <> pow + ExpensiveHash _ hash = expensiveHash tunable salt n + -- Since expensiveHash generates an ascii encoded hash that + -- includes the parameters, take the sha256 of it to get the + -- bytestring that is what's checked for the neccesary number + -- of leading 0 bits. + b = Raaz.toByteString $ Raaz.sha256 $ encodeUtf8 hash + +setBits :: B.ByteString -> [Bool] +setBits = concatMap go . B.unpack + where + go byte = map (uncurry testBit) (zip (repeat byte) [0..7]) + +newtype POWSalt = POWSalt B.ByteString -data ProofOfWorkRequirement t - = Result t - | ProofOfWorkRequirement - { leadingZeros :: Int - , argon2Iterations :: Int - } - deriving (Generic) +instance Encodable POWSalt where + toByteString (POWSalt n) = n + fromByteString = Just . POWSalt -newtype ProofOfWork = ProofOfWork T.Text +genProofOfWork :: ProofOfWorkRequirement -> StorableObjectIdent -> ProofOfWork +genProofOfWork req i = go allByteStrings + where + go [] = error "failed to generate Proof Of Work. This should be impossible!" + go (b:bs) + | isValidProofOfWork candidate req i = candidate + | otherwise = go bs + where + candidate = ProofOfWork b (randomSalt req) diff --git a/HTTP/RateLimit.hs b/HTTP/RateLimit.hs new file mode 100644 index 0000000..9153664 --- /dev/null +++ b/HTTP/RateLimit.hs @@ -0,0 +1,203 @@ +{- Copyright 2016 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module HTTP.RateLimit where + +import Types +import Types.Cost +import HTTP +import HTTP.ProofOfWork +import Servant +import Control.Concurrent +import Control.Concurrent.STM +import Control.Concurrent.TokenBucket +import qualified Data.BloomFilter.Mutable as BloomFilter +import qualified Data.BloomFilter.Hash as BloomFilter +import Data.BloomFilter.Easy (suggestSizing) +import Control.Monad.ST +import System.IO +import Data.Maybe +import Data.Word +import Control.Monad.IO.Class + +-- | A rate limiter is a series of buckets. Each bucket has a +-- successively more difficult proof of work access requirement. +-- +-- To guard against DOS attacks that reuse the same proof of work, +-- RandomSalt values are used, and bloom filters keep track of +-- the ones that have been assigned and used. +data RateLimiter = RateLimiter + { buckets :: TMVar [Bucket] + , assignedRandomSalts :: BloomFilter + , assignedRandomSaltsOld :: BloomFilter + , usedRandomSalts :: BloomFilter + , usedRandomSaltsOld :: BloomFilter + , numRandomSalts :: TMVar Int + , accessLock :: TMVar () + } + +type BloomFilter = TMVar (BloomFilter.MBloom RealWorld RandomSalt) + +-- | Buckets fill up at a fixed rate (which can be tuned by the server +-- operator), and accessing a bucket removes one unit from it. +data Bucket = Bucket + { tokenBucket :: TokenBucket + , mkProofReq :: Maybe (RandomSalt -> ProofOfWorkRequirement) + } + +newRateLimiter :: IO RateLimiter +newRateLimiter = RateLimiter + <$> (newTMVarIO =<< mkbuckets maxProofOfWork []) + <*> mkBloomFilter + <*> mkBloomFilter + <*> mkBloomFilter + <*> mkBloomFilter + <*> newTMVarIO 0 + <*> newTMVarIO () + where + -- The last bucket takes half of maxProofOfWork to access, and + -- each earlier bucket halves that time, down to the first bucket, + -- which needs no proof of work. This ensures that in the edge case + -- where a client keeps getting bumped up to more and more expensive + -- buckets, it doesn't need to do more than maxProofOfWork total work. + mkbuckets (Seconds n) bs + | n <= 0 = return bs + | otherwise = do + let s = Seconds (n `div` 2) + let mkreq = mkProofOfWorkRequirement s + b <- Bucket + <$> newTokenBucket + <*> pure mkreq + case mkreq of + Nothing -> return (b:bs) + Just _ -> mkbuckets s (b:bs) + +mkBloomFilter :: IO BloomFilter +mkBloomFilter = do + b <- stToIO $ BloomFilter.new (BloomFilter.cheapHashes bloomhashes) bloomsize + newTMVarIO b + where + -- Size the bloom filter to hold 1 million items, with a false + -- positive rate of 1 in 100 thousand. This will use around 32 mb + -- of memory. + (bloomhashes, bloomsize) = suggestSizing bloomMaxSize (1/100000) + +-- | Maximum number of RandomSalts that can be stored in a bloom filter +-- without the false positive rate getting bad. +bloomMaxSize :: Int +bloomMaxSize = 1000000 + +-- | Size of the bucket. This allows a burst of accesses after an idle +-- period, which is especially useful when retrieving keys that were +-- split into multiple chunks. However, setting this too high lets clients +-- cheaply store lots of data, so keep the objectSize in mind. +burstSize :: Word64 +burstSize = 4 -- allow 128 kb of data to be stored/retrieved w/o proof of work + +-- | Rate that the bucket is filled. +fillRate :: Word64 +fillRate = 60000000 -- 1 token per minute + +-- A request is tried in each bucket in turn which its proof of work allows +-- access to. If all accessible token buckets are empty, generate a +-- new ProofOfWorkRequirement for the client. +-- +-- If all buckets are tried and are empty, we must be very overloaded. +-- In this case, the request is still processed, since the client has done +-- quite a lot of work. +rateLimit :: RateLimiter -> Maybe ProofOfWork -> StorableObjectIdent -> Handler a -> Handler (POWGuarded a) +rateLimit ratelimiter mpow i a = do + validsalt <- liftIO $ checkValidSalt ratelimiter mpow + bs <- liftIO $ atomically $ readTMVar (buckets ratelimiter) + if validsalt + then go bs + else assignWork ratelimiter bs + where + go [] = do + liftIO $ hPutStrLn stderr "** warning: all token buckets are empty; possible DOS attack?" + Result <$> a + go (b:bs) = case mkProofReq b of + Nothing -> checkbucket b bs + Just mkreq -> case mpow of + Nothing -> assignWork ratelimiter (b:bs) + Just pow@(ProofOfWork _ salt) -> + if isValidProofOfWork pow (mkreq salt) i + then checkbucket b bs + else assignWork ratelimiter (b:bs) + checkbucket b bs = do + allowed <- liftIO $ tokenBucketTryAlloc (tokenBucket b) + burstSize fillRate 1 + if allowed + then Result <$> a + else go bs + +checkValidSalt :: RateLimiter -> Maybe ProofOfWork -> IO Bool +checkValidSalt _ Nothing = return True +checkValidSalt rl (Just (ProofOfWork _ salt)) = do + assigned <- iselem assignedRandomSalts + oldassigned <- iselem assignedRandomSaltsOld + used <- iselem usedRandomSalts + oldused <- iselem usedRandomSaltsOld + if assigned && not oldassigned && not used && not oldused + then do + withBloomFilter rl usedRandomSalts + (`BloomFilter.insert` salt) + return True + else return False + where + iselem f = withBloomFilter rl f (BloomFilter.elem salt) + +assignWork :: RateLimiter -> [Bucket] -> Handler (POWGuarded a) +assignWork ratelimiter bs = case mapMaybe mkProofReq bs of + [] -> throwError err404 + (mkreq:_) -> liftIO $ do + -- 10 second pause here, with a lock held so that only + -- this thread can run. This prevents an attacker + -- flooding requests that cause new random salts to be + -- assigned, in order to fill up the bloom table and cause + -- salts assigned to other clients to be rejected. + -- Since the bloom filters hold 1 million salts, + -- the attacker would need to send requests for over 10 + -- hours to force a bloom filter rotation, so would not + -- impact many users. + atomically $ takeTMVar (accessLock ratelimiter) + threadDelay 10000000 + atomically $ putTMVar (accessLock ratelimiter) () + + salt <- liftIO mkRandomSalt + withBloomFilter ratelimiter assignedRandomSalts + (`BloomFilter.insert` salt) + needrot <- atomically $ do + n <- takeTMVar (numRandomSalts ratelimiter) + if n > bloomMaxSize `div` 2 + then return Nothing + else do + putTMVar (numRandomSalts ratelimiter) (n+1) + return (Just n) + handlerotation needrot + return $ NeedProofOfWork $ mkreq salt + where + handlerotation Nothing = return () + handlerotation (Just n) = do + hPutStrLn stderr $ "rotating bloom filters after processing " ++ show n ++ " requests" + newassigned <- mkBloomFilter + newused <- mkBloomFilter + atomically $ do + oldassigned <- takeTMVar (assignedRandomSalts ratelimiter) + oldused <- takeTMVar (usedRandomSalts ratelimiter) + putTMVar (assignedRandomSaltsOld ratelimiter) oldassigned + putTMVar (usedRandomSaltsOld ratelimiter) oldused + putTMVar (assignedRandomSalts ratelimiter) =<< takeTMVar newassigned + putTMVar (usedRandomSalts ratelimiter) =<< takeTMVar newused + putTMVar (numRandomSalts ratelimiter) 0 + +withBloomFilter + :: RateLimiter + -> (RateLimiter -> BloomFilter) + -> (BloomFilter.MBloom RealWorld RandomSalt -> ST RealWorld a) + -> IO a +withBloomFilter rl field a = do + b <- atomically $ readTMVar (field rl) + stToIO (a b) diff --git a/HTTP/Server.hs b/HTTP/Server.hs index ab27aaa..dd35d1c 100644 --- a/HTTP/Server.hs +++ b/HTTP/Server.hs @@ -9,6 +9,7 @@ module HTTP.Server (runServer) where import HTTP import HTTP.ProofOfWork +import HTTP.RateLimit import Types import Types.Storage import Tunables @@ -26,12 +27,14 @@ import qualified Data.ByteString as B data ServerState = ServerState { obscurerRequest :: TMVar () , storageDirectory :: Maybe LocalStorageDirectory + , rateLimiter :: RateLimiter } newServerState :: Maybe LocalStorageDirectory -> IO ServerState newServerState d = ServerState <$> newEmptyTMVarIO <*> pure d + <*> newRateLimiter runServer :: Maybe LocalStorageDirectory -> String -> Port -> IO () runServer d bindaddress port = do @@ -60,30 +63,30 @@ server st = motd motd :: Handler Motd motd = return $ Motd "Hello World!" -getObject :: ServerState -> StorableObjectIdent -> Maybe ProofOfWork -> Handler (ProofOfWorkRequirement StorableObject) -getObject st i _pow = do +getObject :: ServerState -> StorableObjectIdent -> Maybe ProofOfWork -> Handler (POWGuarded StorableObject) +getObject st i pow = rateLimit (rateLimiter st) pow i $ do r <- liftIO $ retrieveShare (serverStorage st) dummyShareNum i liftIO $ requestObscure st case r of - RetrieveSuccess (Share _n o) -> return $ Result o + RetrieveSuccess (Share _n o) -> return o RetrieveFailure _ -> throwError err404 -putObject :: ServerState -> StorableObjectIdent -> Maybe ProofOfWork -> StorableObject -> Handler (ProofOfWorkRequirement StoreResult) -putObject st i _pow o = do +putObject :: ServerState -> StorableObjectIdent -> Maybe ProofOfWork -> StorableObject -> Handler (POWGuarded StoreResult) +putObject st i pow o = rateLimit (rateLimiter st) pow i $ do if validObjectsize o then do r <- liftIO $ storeShare (serverStorage st) i (Share dummyShareNum o) liftIO $ requestObscure st - return $ Result r - else return $ Result $ StoreFailure "invalid object size" + return r + else return $ StoreFailure "invalid object size" validObjectsize :: StorableObject -> Bool validObjectsize o = any (sz ==) knownObjectSizes where sz = B.length (fromStorableObject o) -countObjects :: ServerState -> Maybe ProofOfWork -> Handler (ProofOfWorkRequirement CountResult) -countObjects st _pow = liftIO $ Result <$> countShares (serverStorage st) +countObjects :: ServerState -> Handler CountResult +countObjects = liftIO . countShares . serverStorage -- | 1 is a dummy value; the server does not know the actual share numbers. dummyShareNum :: ShareNum diff --git a/Storage/Network.hs b/Storage/Network.hs index 24f1c7d..6053ff3 100644 --- a/Storage/Network.hs +++ b/Storage/Network.hs @@ -14,8 +14,10 @@ module Storage.Network ( import Types import Types.Storage +import Types.Cost import Data.List import Data.Char +import HTTP import HTTP.Client import HTTP.ProofOfWork import Servant.Client @@ -58,12 +60,12 @@ networkStorage server = Storage store :: Server -> StorableObjectIdent -> Share -> IO StoreResult store srv i (Share _n o) = - serverRequest srv StoreFailure id $ \pow -> + serverRequest srv StoreFailure id i $ \pow -> putObject i pow o retrieve :: Server -> ShareNum -> StorableObjectIdent -> IO RetrieveResult retrieve srv n i = - serverRequest srv RetrieveFailure (RetrieveSuccess . Share n) $ + serverRequest srv RetrieveFailure (RetrieveSuccess . Share n) i $ getObject i -- | Servers should automatically obscure, so do nothing. @@ -72,7 +74,7 @@ obscure :: Server -> IO ObscureResult obscure _ = return ObscureSuccess count :: Server -> IO CountResult -count srv = serverRequest srv CountFailure id countObjects +count srv = either CountFailure id <$> serverRequest' srv countObjects -- | Not needed for servers. move :: Server -> Storage -> IO () @@ -82,23 +84,39 @@ serverRequest :: Server -> (String -> a) -> (r -> a) - -> (Maybe ProofOfWork -> Manager -> BaseUrl -> ExceptT ServantError IO (ProofOfWorkRequirement r)) + -> StorableObjectIdent + -> (Maybe ProofOfWork -> Manager -> BaseUrl -> ExceptT ServantError IO (POWGuarded r)) -> IO a -serverRequest srv onerr onsuccess a = +serverRequest srv onerr onsuccess i a = go Nothing maxProofOfWork + where + go pow (Seconds timeleft) + | timeleft <= 0 = return $ onerr "server asked for too much proof of work; gave up" + | otherwise = do + res <- serverRequest' srv (a pow) + case res of + Left err -> return $ onerr err + Right (Result r) -> return $ onsuccess r + Right (NeedProofOfWork req) -> go + (Just $ genProofOfWork req i) + (Seconds timeleft - generationTime req) + +serverRequest' + :: Server + -> (Manager -> BaseUrl -> ExceptT ServantError IO r) + -> IO (Either String r) +serverRequest' srv a = do -- A new Manager is allocated for each request, rather than reusing -- any connection. This is a feature; it makes correlation attacks -- harder because the server can't tell if two connections - -- (over tor) came from the same user. - go Nothing =<< torableManager + -- accessing different objects came from the same user, except by + -- comparing IP addresses (which are masked somewhat by using tor). + manager <- torableManager + res <- runExceptT $ a manager url + return $ case res of + Left err -> Left $ "server failure: " ++ show err + Right r -> Right r where url = serverUrl srv - go pow manager = do - res <- runExceptT $ a pow manager url - case res of - Left err -> return $ onerr $ - "server failure: " ++ show err - Right (Result r) -> return $ onsuccess r - Right needpow -> error "NEEDPOW" -- loop with pow -- | HTTP Manager supporting tor .onion and regular hosts torableManager :: IO Manager diff --git a/TODO b/TODO index 1365efa..f35f882 100644 --- a/TODO +++ b/TODO @@ -1,6 +1,6 @@ Soon: -* client/server Proof Of Work +* test client/server Proof Of Work * Add some random padding to http requests and responses, to make it harder for traffic analysis to tell that it's keysafe traffic. * Implement the different categories of servers in the server list. diff --git a/Tunables.hs b/Tunables.hs index d7e5ac7..1d087bf 100644 --- a/Tunables.hs +++ b/Tunables.hs @@ -9,6 +9,7 @@ module Tunables where import Cost import qualified Crypto.Argon2 as Argon2 +import Data.Word -- | To determine the tunables used for a key name the expensive hash of the -- name is calculated, using a particular configuration, and if the @@ -139,3 +140,19 @@ knownObjectSizes :: [Int] knownObjectSizes = map (calc . snd) knownTunings where calc t = objectSize t * shareOverhead t + +-- Hash for client-server Proof Of Work. This is tuned to take around +-- 4 seconds to calculate the hash on a 4 core machine, with 0 added +-- iterations. Adding more iterations will increase that somewhat. +-- +-- This is not included in Tunables because it doesn't affect object +-- encryption and storage. +proofOfWorkHashTunable :: Word32 -> ExpensiveHashTunable +proofOfWorkHashTunable addits = + UseArgon2 (CPUCost (Seconds (4 + (4 * fromIntegral addits `div` 20))) (Divisibility 4)) $ + Argon2.HashOptions + { Argon2.hashIterations = 20 + addits + , Argon2.hashMemory = 131072 -- 128 mebibtyes per thread + , Argon2.hashParallelism = 4 + , Argon2.hashVariant = Argon2.Argon2i + } diff --git a/keysafe.cabal b/keysafe.cabal index 9b75141..b13ee88 100644 --- a/keysafe.cabal +++ b/keysafe.cabal @@ -60,10 +60,14 @@ Executable keysafe , stm == 2.4.* , socks == 0.5.* , network == 2.6.* + , token-bucket == 0.1.* + , bloomfilter == 2.0.* -- Temporarily inlined due to https://github.com/ocharles/argon2/issues/3 -- argon2 == 1.1.* Extra-Libraries: argon2 Other-Modules: + Benchmark + ByteStrings Crypto.Argon2.FFI Crypto.Argon2 CmdLine @@ -76,6 +80,7 @@ Executable keysafe HTTP.Client HTTP.ProofOfWork HTTP.Server + HTTP.RateLimit SecretKey Serialization Share diff --git a/keysafe.hs b/keysafe.hs index 569e678..98e1ecb 100644 --- a/keysafe.hs +++ b/keysafe.hs @@ -13,7 +13,7 @@ import qualified CmdLine import UI import Encryption import Entropy -import ExpensiveHash +import Benchmark import Tests import Cost import SecretKey diff --git a/stack.yaml b/stack.yaml index 82ac935..639d7e1 100644 --- a/stack.yaml +++ b/stack.yaml @@ -11,4 +11,5 @@ extra-deps: - servant-0.7.1 - servant-server-0.7.1 - servant-client-0.7.1 + - token-bucket-0.1.0.1 explicit-setup-deps: -- cgit v1.2.3