From 68eb14fdf6debf1e26921a1b2dddf34dbd031471 Mon Sep 17 00:00:00 2001
From: Joey Hess <joeyh@joeyh.name>
Date: Tue, 13 Sep 2016 22:15:18 -0400
Subject: use less expensive hash for proof of work

The server has to run the hash once to verify a request, so a hash that
took 4 seconds could make the server do too much work if it's being flooded
with requests.

So, made the hash much less expensive.

This required keeping track of fractional seconds. Actually, I used
Rational for them, to avoid most rounding problems. That turned out nice.

I've only tuned the proofOfWorkHashTunable on my fanless overheating
laptop so far. It seems to be fairly reasonablly tuned though.
---
 Benchmark.hs        | 55 ++++++++++++++++++++++++++++++++++++++++++++---------
 Cost.hs             | 13 +++++++------
 HTTP/ProofOfWork.hs | 25 ++++++++++++++++--------
 HTTP/RateLimit.hs   |  2 +-
 Tunables.hs         | 16 ++++++++++------
 Types.hs            |  3 ---
 Types/Cost.hs       |  2 +-
 7 files changed, 82 insertions(+), 34 deletions(-)

diff --git a/Benchmark.hs b/Benchmark.hs
index 21b7ce3..a46e9f4 100644
--- a/Benchmark.hs
+++ b/Benchmark.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE OverloadedStrings, FlexibleInstances #-}
 
 {- Copyright 2016 Joey Hess <id@joeyh.name>
  -
@@ -10,6 +10,7 @@ module Benchmark where
 import Types
 import Tunables
 import ExpensiveHash
+import HTTP.ProofOfWork
 import Cost
 import Serialization ()
 import qualified Data.ByteString.UTF8 as BU8
@@ -20,6 +21,15 @@ import Control.Monad
 import Data.Monoid
 import Data.Maybe
 
+data BenchmarkResult t = BenchmarkResult { expectedBenchmark :: t, actualBenchmark :: t }
+
+instance Show (BenchmarkResult (Cost op)) where
+	show br = "   expected: " ++ showtime (expectedBenchmark br) ++ "s"
+		++ "\tactual: " ++ showtime (actualBenchmark br) ++ "s"
+	  where
+		showtime (CPUCost (Seconds s) _) = 
+			show (fromRational s :: Double)
+
 benchmarkExpensiveHash :: Int -> ExpensiveHashTunable -> IO (BenchmarkResult (Cost CreationOp))
 benchmarkExpensiveHash rounds tunables =
 	benchmarkExpensiveHash' rounds tunables (getexpected tunables)
@@ -40,36 +50,63 @@ benchmarkExpensiveHash' rounds tunables@(UseArgon2 _ hashopts) expected = do
 			(base <> "himom")
 		t `deepseq` return ()
 	end <- getCurrentTime
-	let diff = floor $ end `diffUTCTime` start
+	let diff = floor (end `diffUTCTime` start) :: Integer
 	let maxthreads = Argon2.hashParallelism hashopts
-	let actual = CPUCost (Seconds diff) (Divisibility $ fromIntegral maxthreads)
+	let actual = CPUCost (Seconds (fromIntegral diff)) 
+		(Divisibility $ fromIntegral maxthreads)
 	-- The expected cost is for a single core, so adjust it
 	-- based on the number of cores, up to a maximum of the number
 	-- of threads that the hash is configred to use.
 	let usedcores = min maxthreads numcores
-	let adjustedexpected = mapCost (`div` fromIntegral usedcores) expected
+	let adjustedexpected = mapCost (/ fromIntegral usedcores) expected
 	return $ BenchmarkResult
 		{ expectedBenchmark = adjustedexpected
 		, actualBenchmark = actual
 		}
 
+benchmark :: NFData t => Int -> Cost CreationOp -> (Int -> IO t) -> IO (BenchmarkResult (Cost CreationOp))
+benchmark rounds expected a = do
+	start <- getCurrentTime
+	forM_ [1..rounds] $ \n -> do
+		v <- a n
+		v `deepseq` return ()
+	end <- getCurrentTime
+	let diff = floor (end `diffUTCTime` start) :: Integer
+	return $ BenchmarkResult
+		{ expectedBenchmark = expected
+		, actualBenchmark = CPUCost (Seconds (fromIntegral diff)) (Divisibility 1)
+		}
+
+benchmarkPoW :: Int  -> Seconds -> IO (BenchmarkResult (Cost CreationOp))
+benchmarkPoW rounds seconds = do
+	let Just mk = mkProofOfWorkRequirement seconds
+	s <- newRequestIDSecret
+	rid <- mkRequestID s
+	benchmark rounds (CPUCost (seconds * Seconds (fromIntegral rounds)) (Divisibility 1))
+		(return . genProofOfWork (mk rid))
+
 benchmarkTunables :: Tunables -> IO ()
 benchmarkTunables tunables = do
 	putStrLn "/proc/cpuinfo:"
 	putStrLn =<< readFile "/proc/cpuinfo"
 	
-	putStrLn $ "Benchmarking 16 rounds of proof of work hash..."
-	print =<< benchmarkExpensiveHash 16 (proofOfWorkHashTunable 0)
+	putStrLn "Benchmarking 1000 rounds of proof of work hash..."
+	print =<< benchmarkExpensiveHash 1000 (proofOfWorkHashTunable 0)
+
+	putStrLn "Benchmarking 60 rounds of 1 second proofs of work..."
+	print =<< benchmarkPoW 60 (Seconds 1)
+	
+	putStrLn "Benchmarking 10 rounds of 8 second proofs of work..."
+	print =<< benchmarkPoW 10 (Seconds 8)
 
 	-- Rather than run all 256 rounds of this hash, which would
 	-- probably take on the order of 1 hour, run only 16, and scale
 	-- the expected cost accordingly.
-	let normalrounds = fromIntegral $ 
-		256 * randomSaltBytes (keyEncryptionKeyTunable tunables)
+	let normalrounds = 256 * randomSaltBytes (keyEncryptionKeyTunable tunables)
 	putStrLn $ "Benchmarking 16/" ++ show normalrounds ++ " rounds of key encryption key hash..."
 	r <- benchmarkExpensiveHash' 16
 		(keyEncryptionKeyHash $ keyEncryptionKeyTunable tunables)
-		(mapCost (`div` (normalrounds `div` 16)) $ randomSaltBytesBruteForceCost $ keyEncryptionKeyTunable tunables)
+		(mapCost (/ (fromIntegral normalrounds / 16)) $ randomSaltBytesBruteForceCost $ keyEncryptionKeyTunable tunables)
 	print r
 	putStrLn $ "Estimated time for " ++ show normalrounds ++ " rounds of key encryption key hash..."
 	print $ BenchmarkResult
diff --git a/Cost.hs b/Cost.hs
index 77c2c4c..dc2438e 100644
--- a/Cost.hs
+++ b/Cost.hs
@@ -22,7 +22,7 @@ totalCost (CPUCost s _) = (s, [UsingCPU])
 raiseCostPower :: Cost c -> Entropy e -> Cost c
 raiseCostPower c (Entropy e) = mapCost (* 2^e) c
 
-mapCost :: (Integer -> Integer) -> Cost op -> Cost op
+mapCost :: (Rational-> Rational) -> Cost op -> Cost op
 mapCost f (CPUCost (Seconds n) d) = CPUCost (Seconds (f n)) d
 
 type NumCores = Integer
@@ -30,14 +30,15 @@ type NumCores = Integer
 showCostMinutes :: NumCores -> Cost op -> String
 showCostMinutes numcores (CPUCost (Seconds n) (Divisibility d))
 	| n' < 61 = "1 minute"
-	| otherwise = show (n' `div` 60) ++ " minutes"
+	| otherwise = show (n' / 60) ++ " minutes"
   where
-	n' = n `div` min numcores d
+	n' :: Double
+	n' = fromRational n / fromIntegral (min numcores d)
 
 -- If an operation took n seconds on a number of cores,
 -- multiply to get the CPUCost, which is for a single core.
 coreCost :: NumCores -> Seconds -> Divisibility -> Cost op
-coreCost cores (Seconds n) d = CPUCost (Seconds (cores * n)) d
+coreCost cores (Seconds n) d = CPUCost (Seconds (fromIntegral cores * n)) d
 
 castCost :: Cost a -> Cost b
 castCost (CPUCost s d) = CPUCost s d
@@ -82,11 +83,11 @@ estimateAttackCost :: DataCenterPrice -> Cost BruteForceOp -> Dollars
 estimateAttackCost dc opcost = centsToDollars $ costcents
   where
 	(Seconds cpuseconds) = fst (totalCost opcost)
-	cpuyears = cpuseconds `div` (60*60*24*365)
+	cpuyears = cpuseconds / (60*60*24*365)
 	costpercpuyear = Cents $
 		fromIntegral (instanceCostPerHour dc) * 24 * 365
 			`div` (instanceCpuCores dc * instanceCpuCoreMultiplier dc)
-	costcents = Cents cpuyears * costpercpuyear
+	costcents = Cents (ceiling cpuyears) * costpercpuyear
 
 newtype Cents = Cents Integer
 	deriving (Num, Integral, Enum, Real, Ord, Eq, Show)
diff --git a/HTTP/ProofOfWork.hs b/HTTP/ProofOfWork.hs
index 476ba87..35b5ffd 100644
--- a/HTTP/ProofOfWork.hs
+++ b/HTTP/ProofOfWork.hs
@@ -21,6 +21,7 @@ import Raaz.Core.Encode
 import qualified Raaz
 import Data.BloomFilter.Hash
 import Control.Monad
+import Control.DeepSeq
 import Data.Word
 import Data.Bits
 import Data.Monoid
@@ -28,7 +29,9 @@ import Prelude
 
 -- | A value that the client has to do some work to calculate.
 data ProofOfWork = ProofOfWork B.ByteString RequestID
-	deriving (Show)
+	deriving (Show, Generic)
+
+instance NFData ProofOfWork
 
 data ProofOfWorkRequirement = ProofOfWorkRequirement
 	{ leadingZeros :: Int
@@ -45,10 +48,7 @@ data RequestID = RequestID
 	}
 	deriving (Generic, Show, Eq)
 
--- | Using Text and not ByteString so that ProofOfWorkRequirement can have a
--- JSON instance.
-newtype RandomSalt = RandomSalt { fromRandomSalt :: T.Text }
-	deriving (Generic, Show, Eq)
+instance NFData RequestID
 
 instance Hashable RequestID where
 	hashIO32 = hashIO32 . hashRequestID
@@ -58,6 +58,13 @@ hashRequestID :: RequestID -> B.ByteString
 hashRequestID rid = encodeUtf8 (fromRandomSalt (randomSalt rid)) 
 	<> ":" <> encodeUtf8 (requestHMAC rid)
 
+-- | Using Text and not ByteString so that ProofOfWorkRequirement can have a
+-- JSON instance.
+newtype RandomSalt = RandomSalt { fromRandomSalt :: T.Text }
+	deriving (Generic, Show, Eq)
+
+instance NFData RandomSalt
+
 -- | Servers should never demand a proof of work that takes longer than
 -- this to generate. Note that if a server changes its mind and doubles
 -- the proof of work, a client counts that cumulatively. So, a server
@@ -82,7 +89,7 @@ mkProofOfWorkRequirement (Seconds n)
 	| lz < 1 = Nothing
 	| otherwise = Just $ ProofOfWorkRequirement lz its
   where
-	lz = floor (logBase 2 (max 1 (fromIntegral n / fromIntegral s)) :: Double)
+	lz = floor (logBase 2 (fromRational (max 1 (n / s))) :: Double)
 	UseArgon2 (CPUCost (Seconds s) _) _ = proofOfWorkHashTunable its
 	its = 0
 
@@ -125,10 +132,12 @@ data NoPOWIdent = NoPOWIdent
 instance POWIdent NoPOWIdent where
 	getPOWIdent NoPOWIdent = B.empty
 
+instance POWIdent Int where
+	getPOWIdent = encodeUtf8 . T.pack . show
+
 -- Note that this does not check validRequestID.
 isValidProofOfWork :: POWIdent p => ProofOfWork -> ProofOfWorkRequirement -> p -> Bool
-isValidProofOfWork (ProofOfWork pow rid) req p =
-	samerequestids && enoughzeros
+isValidProofOfWork (ProofOfWork pow rid) req p = samerequestids && enoughzeros
   where
 	samerequestids = rid == requestID req
 	enoughzeros = all (== False) (take (leadingZeros req) (setBits b))
diff --git a/HTTP/RateLimit.hs b/HTTP/RateLimit.hs
index 45c6b9a..39d7dbc 100644
--- a/HTTP/RateLimit.hs
+++ b/HTTP/RateLimit.hs
@@ -104,7 +104,7 @@ newRateLimiter cfg storedir logger = do
 			<*> pure minFillInterval
 		return (b:bs)
 
-	sdiv (Seconds n) d = Seconds (n `div` d)
+	sdiv (Seconds n) d = Seconds (n / d)
 
 mkBloomFilter :: IO BloomFilter
 mkBloomFilter = do
diff --git a/Tunables.hs b/Tunables.hs
index 1d087bf..5c28a39 100644
--- a/Tunables.hs
+++ b/Tunables.hs
@@ -142,17 +142,21 @@ knownObjectSizes = map (calc . snd) knownTunings
 	calc t = objectSize t * shareOverhead t
 
 -- Hash for client-server Proof Of Work. This is tuned to take around
--- 4 seconds to calculate the hash on a 4 core machine, with 0 added
--- iterations. Adding more iterations will increase that somewhat.
+-- 4 seconds to calculate the hash 256 times on a 4 core machine, with 
+-- 0 added iterations. Adding more iterations will increase that somewhat.
 --
 -- This is not included in Tunables because it doesn't affect object
--- encryption and storage.
+-- encryption and storage. Any change to this will break backwards
+-- compatability of the HTTP protocol!
 proofOfWorkHashTunable :: Word32 -> ExpensiveHashTunable
 proofOfWorkHashTunable addits = 
-	UseArgon2 (CPUCost (Seconds (4 + (4 * fromIntegral addits `div` 20))) (Divisibility 4)) $ 
+	UseArgon2 (CPUCost (Seconds nsecs) (Divisibility 4)) $ 
 		Argon2.HashOptions
-			{ Argon2.hashIterations = 20 + addits
-			, Argon2.hashMemory = 131072 -- 128 mebibtyes per thread
+			{ Argon2.hashIterations = nits
+			, Argon2.hashMemory = 1000
 			, Argon2.hashParallelism = 4
 			, Argon2.hashVariant = Argon2.Argon2i
 			}
+  where
+	nits = 20 + addits
+	nsecs = (4 * fromIntegral nits / 20) / 256
diff --git a/Types.hs b/Types.hs
index e129ea3..2ab5d6c 100644
--- a/Types.hs
+++ b/Types.hs
@@ -61,6 +61,3 @@ data SecretKeySource = GpgKey KeyId | KeyFile FilePath
 -- A gpg keyid is the obvious example.
 data KeyId = KeyId B.ByteString
 	deriving (Show)
-
-data BenchmarkResult t = BenchmarkResult { expectedBenchmark :: t, actualBenchmark :: t }
-	deriving (Show)
diff --git a/Types/Cost.hs b/Types/Cost.hs
index 372c43d..4b71116 100644
--- a/Types/Cost.hs
+++ b/Types/Cost.hs
@@ -13,7 +13,7 @@ data Cost op
 	-- ^ cost in Seconds, using 1 physical CPU core
 	deriving (Show, Eq, Ord)
 
-newtype Seconds = Seconds Integer
+newtype Seconds = Seconds Rational
 	deriving (Num, Eq, Ord, Show)
 
 -- | How many CPU cores a single run of an operation can be divided amoung.
-- 
cgit v1.2.3