From 13c408d2295597540f0b2dfb6f7b86e739876c90 Mon Sep 17 00:00:00 2001
From: Joey Hess <joeyh@joeyh.name>
Date: Mon, 12 Sep 2016 22:35:47 -0400
Subject: implement client-server Proof Of Work

Mashed up a argon2-based PoW with token buckets and bloom filters.
This is intended to prevent a few abuses including:

* Using a keysafe server for general file storage, by storing a whole
  lot of chunks.
* An attacker guessing names that people will use, and uploading junk
  to keysafe servers under those names, to make it harder for others to use
  keysafe later.
* An attacker trying to guess the names used for objects on keysafe
  servers in order to download them and start password cracking.
  (As a second level of defense, since the name generation hash
  is expensive already.)

Completely untested, but it builds!

This commit was sponsored by Andreas on Patreon.
---
 Benchmark.hs        |  82 +++++++++++++++++++++
 ByteStrings.hs      |  30 ++++++++
 Encryption.hs       |  31 ++------
 ExpensiveHash.hs    |  61 ----------------
 HTTP.hs             |  39 +++++++---
 HTTP/Client.hs      |   6 +-
 HTTP/ProofOfWork.hs | 116 +++++++++++++++++++++++++++---
 HTTP/RateLimit.hs   | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 HTTP/Server.hs      |  21 +++---
 Storage/Network.hs  |  46 ++++++++----
 TODO                |   2 +-
 Tunables.hs         |  17 +++++
 keysafe.cabal       |   5 ++
 keysafe.hs          |   2 +-
 stack.yaml          |   1 +
 15 files changed, 528 insertions(+), 134 deletions(-)
 create mode 100644 Benchmark.hs
 create mode 100644 ByteStrings.hs
 create mode 100644 HTTP/RateLimit.hs

diff --git a/Benchmark.hs b/Benchmark.hs
new file mode 100644
index 0000000..21b7ce3
--- /dev/null
+++ b/Benchmark.hs
@@ -0,0 +1,82 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+{- Copyright 2016 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU AGPL version 3 or higher.
+ -}
+
+module Benchmark where
+
+import Types
+import Tunables
+import ExpensiveHash
+import Cost
+import Serialization ()
+import qualified Data.ByteString.UTF8 as BU8
+import qualified Crypto.Argon2 as Argon2
+import Data.Time.Clock
+import Control.DeepSeq
+import Control.Monad
+import Data.Monoid
+import Data.Maybe
+
+benchmarkExpensiveHash :: Int -> ExpensiveHashTunable -> IO (BenchmarkResult (Cost CreationOp))
+benchmarkExpensiveHash rounds tunables =
+	benchmarkExpensiveHash' rounds tunables (getexpected tunables)
+  where
+	getexpected (UseArgon2 cost _) = mapCost (* fromIntegral rounds) cost
+
+benchmarkExpensiveHash' :: Int -> ExpensiveHashTunable -> Cost op -> IO (BenchmarkResult (Cost op))
+benchmarkExpensiveHash' rounds tunables@(UseArgon2 _ hashopts) expected = do
+	numcores <- fromIntegral . fromMaybe (error "Unknown number of physical cores.") 
+		<$> getNumCores
+	start <- getCurrentTime
+	forM_ [1..rounds] $ \n -> do
+		-- Must vary the data being hashed to avoid laziness
+		-- caching hash results.
+		let base = BU8.fromString (show n)
+		let ExpensiveHash _ t = expensiveHash tunables
+			(Salt (GpgKey (KeyId (base <> "dummy"))))
+			(base <> "himom")
+		t `deepseq` return ()
+	end <- getCurrentTime
+	let diff = floor $ end `diffUTCTime` start
+	let maxthreads = Argon2.hashParallelism hashopts
+	let actual = CPUCost (Seconds diff) (Divisibility $ fromIntegral maxthreads)
+	-- The expected cost is for a single core, so adjust it
+	-- based on the number of cores, up to a maximum of the number
+	-- of threads that the hash is configred to use.
+	let usedcores = min maxthreads numcores
+	let adjustedexpected = mapCost (`div` fromIntegral usedcores) expected
+	return $ BenchmarkResult
+		{ expectedBenchmark = adjustedexpected
+		, actualBenchmark = actual
+		}
+
+benchmarkTunables :: Tunables -> IO ()
+benchmarkTunables tunables = do
+	putStrLn "/proc/cpuinfo:"
+	putStrLn =<< readFile "/proc/cpuinfo"
+	
+	putStrLn $ "Benchmarking 16 rounds of proof of work hash..."
+	print =<< benchmarkExpensiveHash 16 (proofOfWorkHashTunable 0)
+
+	-- Rather than run all 256 rounds of this hash, which would
+	-- probably take on the order of 1 hour, run only 16, and scale
+	-- the expected cost accordingly.
+	let normalrounds = fromIntegral $ 
+		256 * randomSaltBytes (keyEncryptionKeyTunable tunables)
+	putStrLn $ "Benchmarking 16/" ++ show normalrounds ++ " rounds of key encryption key hash..."
+	r <- benchmarkExpensiveHash' 16
+		(keyEncryptionKeyHash $ keyEncryptionKeyTunable tunables)
+		(mapCost (`div` (normalrounds `div` 16)) $ randomSaltBytesBruteForceCost $ keyEncryptionKeyTunable tunables)
+	print r
+	putStrLn $ "Estimated time for " ++ show normalrounds ++ " rounds of key encryption key hash..."
+	print $ BenchmarkResult
+		{ expectedBenchmark = mapCost (* 16) (expectedBenchmark r)
+		, actualBenchmark = mapCost (* 16) (actualBenchmark r)
+		}
+
+	putStrLn "Benchmarking 1 round of name generation hash..."
+	print =<< benchmarkExpensiveHash 1
+		(nameGenerationHash $ nameGenerationTunable tunables)
diff --git a/ByteStrings.hs b/ByteStrings.hs
new file mode 100644
index 0000000..02e22ab
--- /dev/null
+++ b/ByteStrings.hs
@@ -0,0 +1,30 @@
+{- Copyright 2016 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU AGPL version 3 or higher.
+ -}
+
+module ByteStrings where
+
+import qualified Data.ByteString as B
+
+allByteStringsOfLength :: Int -> [B.ByteString]
+allByteStringsOfLength = go []
+  where
+	go ws n
+		| n == 0 = return (B.pack ws)
+		| otherwise = do
+			w <- [0..255]
+			go (w:ws) (n-1)
+
+-- | Contains every possible byte strings, with shorter ones first.
+allByteStrings :: [B.ByteString]
+allByteStrings = concatMap allByteStringsOfLength [1..]
+
+chunkByteString :: Int -> B.ByteString -> [B.ByteString]
+chunkByteString n = go []
+  where
+	go cs b
+		| B.length b <= n = reverse (b:cs)
+		| otherwise = 
+			let (h, t) = B.splitAt n b
+			in go (h:cs) t
diff --git a/Encryption.hs b/Encryption.hs
index b084c27..12edbc6 100644
--- a/Encryption.hs
+++ b/Encryption.hs
@@ -12,9 +12,11 @@ import Types
 import Tunables
 import Cost
 import ExpensiveHash
+import ByteStrings
 import Data.Monoid
 import Data.Maybe
 import Data.Word
+import Control.Monad
 import qualified Raaz
 import qualified Raaz.Cipher.AES as Raaz
 import qualified Raaz.Cipher.Internal as Raaz
@@ -31,7 +33,7 @@ cipher = Raaz.aes256cbc
 
 encrypt :: Tunables -> KeyEncryptionKey -> SecretKey -> EncryptedSecretKey
 encrypt tunables kek (SecretKey secret) = 
-	EncryptedSecretKey (chunk (objectSize tunables) b) (keyBruteForceCalc kek)
+	EncryptedSecretKey (chunkByteString (objectSize tunables) b) (keyBruteForceCalc kek)
   where
 	-- Raaz does not seem to provide a high-level interface
 	-- for AES encryption, so use unsafeEncrypt. The use of 
@@ -138,24 +140,6 @@ candidateKeyEncryptionKeys tunables name password =
 	saltprefixes = allByteStringsOfLength $ 
 		randomSaltBytes $ keyEncryptionKeyTunable tunables
 
-allByteStringsOfLength :: Int -> [B.ByteString]
-allByteStringsOfLength = go []
-  where
-	go ws n
-		| n == 0 = return (B.pack ws)
-		| otherwise = do
-			w <- [0..255]
-			go (w:ws) (n-1)
-
-chunk :: Int -> B.ByteString -> [B.ByteString]
-chunk n = go []
-  where
-	go cs b
-		| B.length b <= n = reverse (b:cs)
-		| otherwise = 
-			let (h, t) = B.splitAt n b
-			in go (h:cs) t
-
 -- Use the sha256 of the name (truncated) as the IV.
 genIV :: Name -> Raaz.IV
 genIV (Name name) =
@@ -168,13 +152,10 @@ genIV (Name name) =
 type SaltPrefix = B.ByteString
 
 genRandomSaltPrefix :: Raaz.SystemPRG -> Tunables -> IO SaltPrefix
-genRandomSaltPrefix prg tunables = go [] 
-	(randomSaltBytes $ keyEncryptionKeyTunable tunables)
+genRandomSaltPrefix prg tunables = B.pack <$> replicateM n randbyte
   where
-	go ws 0 = return (B.pack ws)
-	go ws n = do
-		b <- Raaz.random prg :: IO Word8
-		go (b:ws) (n-1)
+	n = randomSaltBytes $ keyEncryptionKeyTunable tunables
+	randbyte = Raaz.random prg :: IO Word8
 
 instance Raaz.Random Word8
 
diff --git a/ExpensiveHash.hs b/ExpensiveHash.hs
index b46b23c..6fab15c 100644
--- a/ExpensiveHash.hs
+++ b/ExpensiveHash.hs
@@ -7,20 +7,14 @@
 
 module ExpensiveHash where
 
-import Types
 import Tunables
 import Cost
 import Serialization ()
 import qualified Data.Text as T
 import qualified Data.ByteString as B
-import qualified Data.ByteString.UTF8 as BU8
 import qualified Crypto.Argon2 as Argon2
 import Raaz.Core.Encode
-import Data.Time.Clock
-import Control.DeepSeq
-import Control.Monad
 import Data.Monoid
-import Data.Maybe
 
 -- | A hash that is expensive to calculate.
 --
@@ -46,58 +40,3 @@ expensiveHash (UseArgon2 cost opts) (Salt s) b = ExpensiveHash cost $
 	argonsalt = 
 		let sb = toByteString s
 		in sb <> B.replicate (8 - B.length sb ) 32
-
-benchmarkExpensiveHash :: Int -> ExpensiveHashTunable -> Cost op -> IO (BenchmarkResult (Cost op))
-benchmarkExpensiveHash rounds tunables@(UseArgon2 _ hashopts) expected = do
-	numcores <- fromIntegral . fromMaybe (error "Unknown number of physical cores.") 
-		<$> getNumCores
-	start <- getCurrentTime
-	forM_ [1..rounds] $ \n -> do
-		-- Must vary the data being hashed to avoid laziness
-		-- caching hash results.
-		let base = BU8.fromString (show n)
-		let ExpensiveHash _ t = expensiveHash tunables
-			(Salt (GpgKey (KeyId (base <> "dummy"))))
-			(base <> "himom")
-		t `deepseq` return ()
-	end <- getCurrentTime
-	let diff = floor $ end `diffUTCTime` start
-	let maxthreads = Argon2.hashParallelism hashopts
-	let actual = CPUCost (Seconds diff) (Divisibility $ fromIntegral maxthreads)
-	-- The expected cost is for a single core, so adjust it
-	-- based on the number of cores, up to a maximum of the number
-	-- of threads that the hash is configred to use.
-	let usedcores = min maxthreads numcores
-	let adjustedexpected = mapCost (`div` fromIntegral usedcores) expected
-	return $ BenchmarkResult
-		{ expectedBenchmark = adjustedexpected
-		, actualBenchmark = actual
-		}
-
-benchmarkTunables :: Tunables -> IO ()
-benchmarkTunables tunables = do
-	putStrLn "/proc/cpuinfo:"
-	putStrLn =<< readFile "/proc/cpuinfo"
-	
-	-- Rather than run all 256 rounds of this hash, which would
-	-- probably take on the order of 1 hour, run only 16, and scale
-	-- the expected cost accordingly.
-	let normalrounds = fromIntegral $ 
-		256 * randomSaltBytes (keyEncryptionKeyTunable tunables)
-	putStrLn $ "Benchmarking 16/" ++ show normalrounds ++ " rounds of key encryption key hash..."
-	r <- benchmarkExpensiveHash 16
-		(keyEncryptionKeyHash $ keyEncryptionKeyTunable tunables)
-		(mapCost (`div` (normalrounds `div` 16)) $ randomSaltBytesBruteForceCost $ keyEncryptionKeyTunable tunables)
-	print r
-	putStrLn $ "Estimated time for " ++ show normalrounds ++ " rounds of key encryption key hash..."
-	print $ BenchmarkResult
-		{ expectedBenchmark = mapCost (* 16) (expectedBenchmark r)
-		, actualBenchmark = mapCost (* 16) (actualBenchmark r)
-		}
-
-	putStrLn "Benchmarking 1 round of name generation hash..."
-	print =<< benchmarkExpensiveHash 1
-		(nameGenerationHash $ nameGenerationTunable tunables)
-		(getexpected $ nameGenerationHash $ nameGenerationTunable tunables)
-  where
-	getexpected (UseArgon2 cost _) = cost
diff --git a/HTTP.hs b/HTTP.hs
index db9ef4d..702a806 100644
--- a/HTTP.hs
+++ b/HTTP.hs
@@ -20,40 +20,47 @@ import Servant.API
 import Data.Text
 import Data.Aeson.Types
 import GHC.Generics hiding (V1)
+import qualified Data.Text as T
 import qualified Data.Text.Encoding as T
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as L
 import qualified Raaz.Core.Encode as Raaz
+import Data.Monoid
+import Prelude
 
 -- | Keysafe's http API
 type HttpAPI = 
 	"keysafe" :> V1 :> "motd" :> Get '[JSON] Motd
 	:<|> "keysafe" :> V1 :> "objects" :> ObjectIdent :> POWParam
-		:> Get '[JSON] (ProofOfWorkRequirement StorableObject)
+		:> Get '[JSON] (POWGuarded StorableObject)
 	:<|> "keysafe" :> V1 :> "objects" :> ObjectIdent :> POWParam
 		:> ReqBody '[OctetStream] StorableObject
-		:> Put '[JSON] (ProofOfWorkRequirement StoreResult)
-	:<|> "keysafe" :> V1 :> "stats" :> "countobjects" :> POWParam
-		:> Get '[JSON] (ProofOfWorkRequirement CountResult)
+		:> Put '[JSON] (POWGuarded StoreResult)
+	:<|> "keysafe" :> V1 :> "stats" :> "countobjects"
+		:> Get '[JSON] CountResult
 
 type V1 = "v1"
 
 newtype Motd = Motd Text
 	deriving (Generic)
 
+data POWGuarded t
+	= Result t
+	| NeedProofOfWork ProofOfWorkRequirement
+	deriving (Generic)
+
 type POWParam = QueryParam "proofofwork" ProofOfWork
 
 type ObjectIdent = Capture "ident" StorableObjectIdent
 
 instance ToJSON Motd
 instance FromJSON Motd
-instance ToJSON t => ToJSON (ProofOfWorkRequirement t)
-instance FromJSON t => FromJSON (ProofOfWorkRequirement t)
-
-instance FromHttpApiData ProofOfWork where
-	parseUrlPiece = Right . ProofOfWork
-instance ToHttpApiData ProofOfWork where
-	toUrlPiece (ProofOfWork t) = t
+instance ToJSON t => ToJSON (POWGuarded t)
+instance FromJSON t => FromJSON (POWGuarded t)
+instance ToJSON ProofOfWorkRequirement
+instance FromJSON ProofOfWorkRequirement
+instance ToJSON RandomSalt
+instance FromJSON RandomSalt
 
 -- StorableObjectIdent contains a hash, which is valid UTF-8.
 instance ToHttpApiData StorableObjectIdent where
@@ -75,6 +82,16 @@ instance FromJSON StorableObject where
 	parseJSON (Object v) = StorableObject <$> (unb64 =<< v .: "data")
 	parseJSON invalid = typeMismatch "StorableObject" invalid
 
+-- ProofOfWork contains an arbitrary bytestring and is base64 encoded in
+-- the query string.
+instance ToHttpApiData ProofOfWork where
+	toUrlPiece (ProofOfWork b (RandomSalt s)) = s <> ":" <> b64 b
+instance FromHttpApiData ProofOfWork where
+	parseUrlPiece t = do
+		let (s, rest) = T.break (/= ':') t
+		b <- unb64 (T.drop 1 rest)
+		return (ProofOfWork b (RandomSalt s))
+
 b64 :: B.ByteString -> Text
 b64 v = T.decodeUtf8 $ Raaz.toByteString (Raaz.encode v :: Raaz.Base64)
 
diff --git a/HTTP/Client.hs b/HTTP/Client.hs
index 0c28005..8f81db4 100644
--- a/HTTP/Client.hs
+++ b/HTTP/Client.hs
@@ -18,7 +18,7 @@ httpAPI :: Proxy HttpAPI
 httpAPI = Proxy
 
 motd :: Manager -> BaseUrl -> ClientM Motd
-getObject :: StorableObjectIdent -> Maybe ProofOfWork -> Manager -> BaseUrl -> ClientM (ProofOfWorkRequirement StorableObject)
-putObject :: StorableObjectIdent -> Maybe ProofOfWork -> StorableObject -> Manager -> BaseUrl -> ClientM (ProofOfWorkRequirement StoreResult)
-countObjects :: Maybe ProofOfWork -> Manager -> BaseUrl -> ClientM (ProofOfWorkRequirement CountResult)
+getObject :: StorableObjectIdent -> Maybe ProofOfWork -> Manager -> BaseUrl -> ClientM (POWGuarded StorableObject)
+putObject :: StorableObjectIdent -> Maybe ProofOfWork -> StorableObject -> Manager -> BaseUrl -> ClientM (POWGuarded StoreResult)
+countObjects :: Manager -> BaseUrl -> ClientM CountResult
 motd :<|> getObject :<|> putObject :<|> countObjects = client httpAPI
diff --git a/HTTP/ProofOfWork.hs b/HTTP/ProofOfWork.hs
index 45cc96d..04aec57 100644
--- a/HTTP/ProofOfWork.hs
+++ b/HTTP/ProofOfWork.hs
@@ -4,19 +4,117 @@
  -}
 
 {-# LANGUAGE DeriveGeneric #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
 
 module HTTP.ProofOfWork where
 
 import Types
-import qualified Data.Text as T
+import Types.Cost
+import ExpensiveHash
+import Tunables
+import ByteStrings
 import GHC.Generics
+import qualified Data.Text as T
+import qualified Data.ByteString as B
+import Data.Text.Encoding (encodeUtf8)
+import Raaz.Core.Encode
+import qualified Raaz
+import Data.BloomFilter.Hash
+import Control.Monad
+import Data.Word
+import Data.Bits
+import Data.Monoid
+import Prelude
+
+-- | A value that the client has to do some work to calculate.
+data ProofOfWork = ProofOfWork B.ByteString RandomSalt
+	deriving (Show)
+
+data ProofOfWorkRequirement = ProofOfWorkRequirement
+	{ leadingZeros :: Int
+	, addedArgon2Iterations :: Word32
+	, randomSalt :: RandomSalt
+	}
+	deriving (Generic, Show)
+
+-- | Using Text and not ByteString so that ProofOfWorkRequirement can have a
+-- JSON instance.
+newtype RandomSalt = RandomSalt { fromRandomSalt :: T.Text }
+	deriving (Generic, Show, Eq)
+
+instance Hashable RandomSalt where
+	hashIO32 = hashIO32 . encodeUtf8 . fromRandomSalt
+	hashIO64 = hashIO64 . encodeUtf8 . fromRandomSalt
+
+-- | Servers should never demand a proof of work that takes longer than
+-- this to generate. Note that if a server changes its mind and doubles
+-- the proof of work, a client counts that cumulatively. So, a server
+-- should avoid any single proof of work requirement taking more than half
+-- this long.
+maxProofOfWork :: Seconds
+maxProofOfWork = Seconds (16*60)
+
+-- | How long it will take to generate a proof of work meeting the
+-- requirement, maximum.
+--
+-- Of course, a client can get lucky and find a value that works
+-- on the very first try. On average, the client will need to work for half
+-- as long as the returned number of Seconds.
+generationTime :: ProofOfWorkRequirement -> Seconds
+generationTime req = 
+	let UseArgon2 (CPUCost (Seconds s) _) _ = proofOfWorkHashTunable (addedArgon2Iterations req)
+	in Seconds ((2^(leadingZeros req)) * s)
+
+mkProofOfWorkRequirement :: Seconds -> Maybe (RandomSalt -> ProofOfWorkRequirement)
+mkProofOfWorkRequirement (Seconds n)
+	| lz < 1 = Nothing
+	| otherwise = Just $ ProofOfWorkRequirement lz its
+  where
+	lz = floor (logBase 2 (max 1 (fromIntegral n / fromIntegral s)) :: Double)
+	UseArgon2 (CPUCost (Seconds s) _) _ = proofOfWorkHashTunable its
+	its = 0
+
+mkRandomSalt :: IO RandomSalt
+mkRandomSalt = do
+	prg <- Raaz.newPRG () :: IO Raaz.SystemPRG
+	rs <- replicateM 16 (Raaz.random prg :: IO Word8)
+	return $ RandomSalt $ T.pack $ concatMap show rs
+
+instance Raaz.Random Word8
+
+isValidProofOfWork :: ProofOfWork -> ProofOfWorkRequirement -> StorableObjectIdent -> Bool
+isValidProofOfWork (ProofOfWork pow rsalt) req (StorableObjectIdent n) =
+	samesalts && enoughzeros
+  where
+	samesalts = rsalt == randomSalt req
+	enoughzeros = all (== False) (take (leadingZeros req) (setBits b))
+	tunable = proofOfWorkHashTunable (addedArgon2Iterations req)
+	salt = Salt $ POWSalt $
+		encodeUtf8 (fromRandomSalt (randomSalt req)) <> pow
+	ExpensiveHash _ hash = expensiveHash tunable salt n
+	-- Since expensiveHash generates an ascii encoded hash that
+	-- includes the parameters, take the sha256 of it to get the
+	-- bytestring that is what's checked for the neccesary number
+	-- of leading 0 bits.
+	b = Raaz.toByteString $ Raaz.sha256 $ encodeUtf8 hash
+
+setBits :: B.ByteString -> [Bool]
+setBits = concatMap go . B.unpack
+  where
+	go byte = map (uncurry testBit) (zip (repeat byte) [0..7])
+
+newtype POWSalt = POWSalt B.ByteString
 
-data ProofOfWorkRequirement t
-	= Result t
-	| ProofOfWorkRequirement
-		{ leadingZeros :: Int
-		, argon2Iterations :: Int
-		}
-	deriving (Generic)
+instance Encodable POWSalt where
+	toByteString (POWSalt n) = n
+	fromByteString = Just . POWSalt
 
-newtype ProofOfWork = ProofOfWork T.Text
+genProofOfWork :: ProofOfWorkRequirement -> StorableObjectIdent -> ProofOfWork
+genProofOfWork req i = go allByteStrings
+  where
+	go [] = error "failed to generate Proof Of Work. This should be impossible!"
+	go (b:bs)
+		| isValidProofOfWork candidate req i = candidate
+		| otherwise = go bs
+	  where
+		candidate = ProofOfWork b (randomSalt req)
diff --git a/HTTP/RateLimit.hs b/HTTP/RateLimit.hs
new file mode 100644
index 0000000..9153664
--- /dev/null
+++ b/HTTP/RateLimit.hs
@@ -0,0 +1,203 @@
+{- Copyright 2016 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU AGPL version 3 or higher.
+ -}
+
+module HTTP.RateLimit where
+
+import Types
+import Types.Cost
+import HTTP
+import HTTP.ProofOfWork
+import Servant
+import Control.Concurrent
+import Control.Concurrent.STM
+import Control.Concurrent.TokenBucket
+import qualified Data.BloomFilter.Mutable as BloomFilter
+import qualified Data.BloomFilter.Hash as BloomFilter
+import Data.BloomFilter.Easy (suggestSizing)
+import Control.Monad.ST
+import System.IO
+import Data.Maybe
+import Data.Word
+import Control.Monad.IO.Class
+
+-- | A rate limiter is a series of buckets. Each bucket has a
+-- successively more difficult proof of work access requirement.
+-- 
+-- To guard against DOS attacks that reuse the same proof of work,
+-- RandomSalt values are used, and bloom filters keep track of
+-- the ones that have been assigned and used.
+data RateLimiter = RateLimiter
+	{ buckets :: TMVar [Bucket]
+	, assignedRandomSalts :: BloomFilter
+	, assignedRandomSaltsOld :: BloomFilter
+	, usedRandomSalts :: BloomFilter
+	, usedRandomSaltsOld :: BloomFilter
+	, numRandomSalts :: TMVar Int
+	, accessLock :: TMVar ()
+	}
+
+type BloomFilter = TMVar (BloomFilter.MBloom RealWorld RandomSalt)
+
+-- | Buckets fill up at a fixed rate (which can be tuned by the server
+-- operator), and accessing a bucket removes one unit from it.
+data Bucket = Bucket
+	{ tokenBucket :: TokenBucket
+	, mkProofReq :: Maybe (RandomSalt -> ProofOfWorkRequirement)
+	}
+
+newRateLimiter :: IO RateLimiter
+newRateLimiter = RateLimiter 
+	<$> (newTMVarIO =<< mkbuckets maxProofOfWork [])
+	<*> mkBloomFilter
+	<*> mkBloomFilter
+	<*> mkBloomFilter
+	<*> mkBloomFilter
+	<*> newTMVarIO 0
+	<*> newTMVarIO ()
+  where
+	-- The last bucket takes half of maxProofOfWork to access, and
+	-- each earlier bucket halves that time, down to the first bucket,
+	-- which needs no proof of work. This ensures that in the edge case
+	-- where a client keeps getting bumped up to more and more expensive
+	-- buckets, it doesn't need to do more than maxProofOfWork total work.
+	mkbuckets (Seconds n) bs
+		| n <= 0 = return bs
+		| otherwise = do
+			let s = Seconds (n `div` 2)
+			let mkreq = mkProofOfWorkRequirement s
+			b <- Bucket
+				<$> newTokenBucket
+				<*> pure mkreq
+			case mkreq of
+				Nothing -> return (b:bs)
+				Just _ -> mkbuckets s (b:bs)
+
+mkBloomFilter :: IO BloomFilter
+mkBloomFilter = do
+	b <- stToIO $ BloomFilter.new (BloomFilter.cheapHashes bloomhashes) bloomsize
+	newTMVarIO b
+  where
+	-- Size the bloom filter to hold 1 million items, with a false
+	-- positive rate of 1 in 100 thousand. This will use around 32 mb
+	-- of memory.
+	(bloomhashes, bloomsize) = suggestSizing bloomMaxSize (1/100000)
+
+-- | Maximum number of RandomSalts that can be stored in a bloom filter
+-- without the false positive rate getting bad.
+bloomMaxSize :: Int
+bloomMaxSize = 1000000
+
+-- | Size of the bucket. This allows a burst of accesses after an idle
+-- period, which is especially useful when retrieving keys that were
+-- split into multiple chunks. However, setting this too high lets clients
+-- cheaply store lots of data, so keep the objectSize in mind.
+burstSize :: Word64
+burstSize = 4 -- allow 128 kb of data to be stored/retrieved w/o proof of work
+
+-- | Rate that the bucket is filled.
+fillRate :: Word64
+fillRate = 60000000 -- 1 token per minute
+
+-- A request is tried in each bucket in turn which its proof of work allows
+-- access to. If all accessible token buckets are empty, generate a
+-- new ProofOfWorkRequirement for the client.
+--
+-- If all buckets are tried and are empty, we must be very overloaded. 
+-- In this case, the request is still processed, since the client has done
+-- quite a lot of work.
+rateLimit :: RateLimiter -> Maybe ProofOfWork -> StorableObjectIdent -> Handler a -> Handler (POWGuarded a)
+rateLimit ratelimiter mpow i a = do
+	validsalt <- liftIO $ checkValidSalt ratelimiter mpow
+	bs <- liftIO $ atomically $ readTMVar (buckets ratelimiter)
+	if validsalt
+		then go bs
+		else assignWork ratelimiter bs
+  where
+	go [] = do
+		liftIO $ hPutStrLn stderr "** warning: all token buckets are empty; possible DOS attack?"
+		Result <$> a
+	go (b:bs) = case mkProofReq b of
+		Nothing -> checkbucket b bs
+		Just mkreq -> case mpow of
+			Nothing -> assignWork ratelimiter (b:bs)
+			Just pow@(ProofOfWork _ salt) -> 
+				if isValidProofOfWork pow (mkreq salt) i
+					then checkbucket b bs
+					else assignWork ratelimiter (b:bs)
+	checkbucket b bs = do
+		allowed <- liftIO $ tokenBucketTryAlloc (tokenBucket b) 
+			burstSize fillRate 1
+		if allowed
+			then Result <$> a
+			else go bs
+
+checkValidSalt :: RateLimiter -> Maybe ProofOfWork -> IO Bool
+checkValidSalt _ Nothing = return True
+checkValidSalt rl (Just (ProofOfWork _ salt)) = do
+	assigned <- iselem assignedRandomSalts
+	oldassigned <- iselem assignedRandomSaltsOld
+	used <- iselem usedRandomSalts
+	oldused <- iselem usedRandomSaltsOld
+	if assigned && not oldassigned && not used && not oldused
+		then do
+			withBloomFilter rl usedRandomSalts
+				(`BloomFilter.insert` salt)
+			return True
+		else return False
+  where
+	iselem f = withBloomFilter rl f (BloomFilter.elem salt)
+
+assignWork :: RateLimiter -> [Bucket] -> Handler (POWGuarded a)
+assignWork ratelimiter bs = case mapMaybe mkProofReq bs of
+	[] -> throwError err404
+	(mkreq:_) -> liftIO $ do
+		-- 10 second pause here, with a lock held so that only
+		-- this thread can run. This prevents an attacker
+		-- flooding requests that cause new random salts to be
+		-- assigned, in order to fill up the bloom table and cause
+		-- salts assigned to other clients to be rejected.
+		-- Since the bloom filters hold 1 million salts,
+		-- the attacker would need to send requests for over 10
+		-- hours to force a bloom filter rotation, so would not
+		-- impact many users.
+		atomically $ takeTMVar (accessLock ratelimiter)
+		threadDelay 10000000
+		atomically $ putTMVar (accessLock ratelimiter) ()
+
+		salt <- liftIO mkRandomSalt
+		withBloomFilter ratelimiter assignedRandomSalts
+			(`BloomFilter.insert` salt)
+		needrot <- atomically $ do
+			n <- takeTMVar (numRandomSalts ratelimiter)
+			if n > bloomMaxSize `div` 2
+				then return Nothing
+				else do
+					putTMVar (numRandomSalts ratelimiter) (n+1)
+					return (Just n)
+		handlerotation needrot
+		return $ NeedProofOfWork $ mkreq salt
+  where
+	handlerotation Nothing = return ()
+	handlerotation (Just n) = do
+		hPutStrLn stderr $ "rotating bloom filters after processing " ++ show n ++ " requests"
+		newassigned <- mkBloomFilter
+		newused <- mkBloomFilter
+		atomically $ do
+			oldassigned <- takeTMVar (assignedRandomSalts ratelimiter)
+			oldused <- takeTMVar (usedRandomSalts ratelimiter)
+			putTMVar (assignedRandomSaltsOld ratelimiter) oldassigned
+			putTMVar (usedRandomSaltsOld ratelimiter) oldused
+			putTMVar (assignedRandomSalts ratelimiter) =<< takeTMVar newassigned
+			putTMVar (usedRandomSalts ratelimiter) =<< takeTMVar newused
+			putTMVar (numRandomSalts ratelimiter) 0
+
+withBloomFilter
+	:: RateLimiter
+	-> (RateLimiter -> BloomFilter)
+	-> (BloomFilter.MBloom RealWorld RandomSalt -> ST RealWorld a)
+	-> IO a
+withBloomFilter rl field a = do
+	b <- atomically $ readTMVar (field rl)
+	stToIO (a b)
diff --git a/HTTP/Server.hs b/HTTP/Server.hs
index ab27aaa..dd35d1c 100644
--- a/HTTP/Server.hs
+++ b/HTTP/Server.hs
@@ -9,6 +9,7 @@ module HTTP.Server (runServer) where
 
 import HTTP
 import HTTP.ProofOfWork
+import HTTP.RateLimit
 import Types
 import Types.Storage
 import Tunables
@@ -26,12 +27,14 @@ import qualified Data.ByteString as B
 data ServerState = ServerState
 	{ obscurerRequest :: TMVar ()
 	, storageDirectory :: Maybe LocalStorageDirectory
+	, rateLimiter :: RateLimiter
 	}
 
 newServerState :: Maybe LocalStorageDirectory -> IO ServerState
 newServerState d = ServerState
 	<$> newEmptyTMVarIO
 	<*> pure d
+	<*> newRateLimiter
 
 runServer :: Maybe LocalStorageDirectory -> String -> Port -> IO ()
 runServer d bindaddress port = do
@@ -60,30 +63,30 @@ server st = motd
 motd :: Handler Motd
 motd = return $ Motd "Hello World!"
 
-getObject :: ServerState -> StorableObjectIdent -> Maybe ProofOfWork -> Handler (ProofOfWorkRequirement StorableObject)
-getObject st i _pow = do
+getObject :: ServerState -> StorableObjectIdent -> Maybe ProofOfWork -> Handler (POWGuarded StorableObject)
+getObject st i pow = rateLimit (rateLimiter st) pow i $ do
 	r <- liftIO $ retrieveShare (serverStorage st) dummyShareNum i
 	liftIO $ requestObscure st
 	case r of
-		RetrieveSuccess (Share _n o) -> return $ Result o
+		RetrieveSuccess (Share _n o) -> return o
 		RetrieveFailure _ -> throwError err404
 
-putObject :: ServerState -> StorableObjectIdent -> Maybe ProofOfWork -> StorableObject -> Handler (ProofOfWorkRequirement StoreResult)
-putObject st i _pow o = do
+putObject :: ServerState -> StorableObjectIdent -> Maybe ProofOfWork -> StorableObject -> Handler (POWGuarded StoreResult)
+putObject st i pow o = rateLimit (rateLimiter st) pow i $ do
 	if validObjectsize o
 		then do
 			r <- liftIO $ storeShare (serverStorage st) i (Share dummyShareNum o)
 			liftIO $ requestObscure st
-			return $ Result r
-		else return $ Result $ StoreFailure "invalid object size"
+			return r
+		else return $ StoreFailure "invalid object size"
 
 validObjectsize :: StorableObject -> Bool
 validObjectsize o = any (sz ==) knownObjectSizes
   where
 	sz = B.length (fromStorableObject o)
 
-countObjects :: ServerState -> Maybe ProofOfWork -> Handler (ProofOfWorkRequirement CountResult)
-countObjects st _pow = liftIO $ Result <$> countShares (serverStorage st)
+countObjects :: ServerState -> Handler CountResult
+countObjects = liftIO . countShares . serverStorage
 
 -- | 1 is a dummy value; the server does not know the actual share numbers.
 dummyShareNum :: ShareNum
diff --git a/Storage/Network.hs b/Storage/Network.hs
index 24f1c7d..6053ff3 100644
--- a/Storage/Network.hs
+++ b/Storage/Network.hs
@@ -14,8 +14,10 @@ module Storage.Network (
 
 import Types
 import Types.Storage
+import Types.Cost
 import Data.List
 import Data.Char
+import HTTP
 import HTTP.Client
 import HTTP.ProofOfWork
 import Servant.Client
@@ -58,12 +60,12 @@ networkStorage server = Storage
 
 store :: Server -> StorableObjectIdent -> Share -> IO StoreResult
 store srv i (Share _n o) = 
-	serverRequest srv StoreFailure id $ \pow ->
+	serverRequest srv StoreFailure id i $ \pow ->
 		putObject i pow o
 
 retrieve :: Server -> ShareNum -> StorableObjectIdent -> IO RetrieveResult
 retrieve srv n i = 
-	serverRequest srv RetrieveFailure (RetrieveSuccess . Share n) $
+	serverRequest srv RetrieveFailure (RetrieveSuccess . Share n) i $
 		getObject i
 
 -- | Servers should automatically obscure, so do nothing.
@@ -72,7 +74,7 @@ obscure :: Server -> IO ObscureResult
 obscure _ = return ObscureSuccess
 
 count :: Server -> IO CountResult
-count srv = serverRequest srv CountFailure id countObjects
+count srv = either CountFailure id <$> serverRequest' srv countObjects
 
 -- | Not needed for servers.
 move :: Server -> Storage -> IO ()
@@ -82,23 +84,39 @@ serverRequest
 	:: Server
 	-> (String -> a)
 	-> (r -> a)
-	-> (Maybe ProofOfWork -> Manager -> BaseUrl -> ExceptT ServantError IO (ProofOfWorkRequirement r))
+	-> StorableObjectIdent
+	-> (Maybe ProofOfWork -> Manager -> BaseUrl -> ExceptT ServantError IO (POWGuarded r))
 	-> IO a
-serverRequest srv onerr onsuccess a = 
+serverRequest srv onerr onsuccess i a = go Nothing maxProofOfWork
+  where
+	go pow (Seconds timeleft)
+		| timeleft <= 0 = return $ onerr "server asked for too much proof of work; gave up"
+		| otherwise = do
+			res <- serverRequest' srv (a pow)
+			case res of
+				Left err -> return $ onerr err
+				Right (Result r) -> return $ onsuccess r
+				Right (NeedProofOfWork req) -> go
+					(Just $ genProofOfWork req i)
+					(Seconds timeleft - generationTime req)
+
+serverRequest'
+	:: Server
+	-> (Manager -> BaseUrl -> ExceptT ServantError IO r)
+	-> IO (Either String r)
+serverRequest' srv a = do
 	-- A new Manager is allocated for each request, rather than reusing
 	-- any connection. This is a feature; it makes correlation attacks
 	-- harder because the server can't tell if two connections
-	-- (over tor) came from the same user.
-	go Nothing =<< torableManager
+	-- accessing different objects came from the same user, except by
+	-- comparing IP addresses (which are masked somewhat by using tor).
+	manager <- torableManager
+	res <- runExceptT $ a manager url
+	return $ case res of
+		Left err -> Left $ "server failure: " ++ show err
+		Right r -> Right r
   where
 	url = serverUrl srv
-	go pow manager = do
-		res <- runExceptT $ a pow manager url
-		case res of
-			Left err -> return $ onerr $ 
-				"server failure: " ++ show err
-			Right (Result r) -> return $ onsuccess r
-			Right needpow -> error "NEEDPOW" -- loop with pow
 
 -- | HTTP Manager supporting tor .onion and regular hosts
 torableManager :: IO Manager
diff --git a/TODO b/TODO
index 1365efa..f35f882 100644
--- a/TODO
+++ b/TODO
@@ -1,6 +1,6 @@
 Soon:
 
-* client/server Proof Of Work
+* test client/server Proof Of Work
 * Add some random padding to http requests and responses, to make it
   harder for traffic analysis to tell that it's keysafe traffic.
 * Implement the different categories of servers in the server list.
diff --git a/Tunables.hs b/Tunables.hs
index d7e5ac7..1d087bf 100644
--- a/Tunables.hs
+++ b/Tunables.hs
@@ -9,6 +9,7 @@ module Tunables where
 
 import Cost
 import qualified Crypto.Argon2 as Argon2
+import Data.Word
 
 -- | To determine the tunables used for a key name the expensive hash of the
 -- name is calculated, using a particular configuration, and if the
@@ -139,3 +140,19 @@ knownObjectSizes :: [Int]
 knownObjectSizes = map (calc . snd) knownTunings
   where
 	calc t = objectSize t * shareOverhead t
+
+-- Hash for client-server Proof Of Work. This is tuned to take around
+-- 4 seconds to calculate the hash on a 4 core machine, with 0 added
+-- iterations. Adding more iterations will increase that somewhat.
+--
+-- This is not included in Tunables because it doesn't affect object
+-- encryption and storage.
+proofOfWorkHashTunable :: Word32 -> ExpensiveHashTunable
+proofOfWorkHashTunable addits = 
+	UseArgon2 (CPUCost (Seconds (4 + (4 * fromIntegral addits `div` 20))) (Divisibility 4)) $ 
+		Argon2.HashOptions
+			{ Argon2.hashIterations = 20 + addits
+			, Argon2.hashMemory = 131072 -- 128 mebibtyes per thread
+			, Argon2.hashParallelism = 4
+			, Argon2.hashVariant = Argon2.Argon2i
+			}
diff --git a/keysafe.cabal b/keysafe.cabal
index 9b75141..b13ee88 100644
--- a/keysafe.cabal
+++ b/keysafe.cabal
@@ -60,10 +60,14 @@ Executable keysafe
     , stm == 2.4.*
     , socks == 0.5.*
     , network == 2.6.*
+    , token-bucket == 0.1.*
+    , bloomfilter == 2.0.*
     -- Temporarily inlined due to https://github.com/ocharles/argon2/issues/3
     -- argon2 == 1.1.*
   Extra-Libraries: argon2
   Other-Modules:
+    Benchmark
+    ByteStrings
     Crypto.Argon2.FFI
     Crypto.Argon2
     CmdLine
@@ -76,6 +80,7 @@ Executable keysafe
     HTTP.Client
     HTTP.ProofOfWork
     HTTP.Server
+    HTTP.RateLimit
     SecretKey
     Serialization
     Share
diff --git a/keysafe.hs b/keysafe.hs
index 569e678..98e1ecb 100644
--- a/keysafe.hs
+++ b/keysafe.hs
@@ -13,7 +13,7 @@ import qualified CmdLine
 import UI
 import Encryption
 import Entropy
-import ExpensiveHash
+import Benchmark
 import Tests
 import Cost
 import SecretKey
diff --git a/stack.yaml b/stack.yaml
index 82ac935..639d7e1 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -11,4 +11,5 @@ extra-deps:
         - servant-0.7.1
         - servant-server-0.7.1
         - servant-client-0.7.1
+        - token-bucket-0.1.0.1
 explicit-setup-deps:
-- 
cgit v1.2.3