From 68eb14fdf6debf1e26921a1b2dddf34dbd031471 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 13 Sep 2016 22:15:18 -0400 Subject: use less expensive hash for proof of work The server has to run the hash once to verify a request, so a hash that took 4 seconds could make the server do too much work if it's being flooded with requests. So, made the hash much less expensive. This required keeping track of fractional seconds. Actually, I used Rational for them, to avoid most rounding problems. That turned out nice. I've only tuned the proofOfWorkHashTunable on my fanless overheating laptop so far. It seems to be fairly reasonablly tuned though. --- Benchmark.hs | 55 ++++++++++++++++++++++++++++++++++++++++++++--------- Cost.hs | 13 +++++++------ HTTP/ProofOfWork.hs | 25 ++++++++++++++++-------- HTTP/RateLimit.hs | 2 +- Tunables.hs | 16 ++++++++++------ Types.hs | 3 --- Types/Cost.hs | 2 +- 7 files changed, 82 insertions(+), 34 deletions(-) diff --git a/Benchmark.hs b/Benchmark.hs index 21b7ce3..a46e9f4 100644 --- a/Benchmark.hs +++ b/Benchmark.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings, FlexibleInstances #-} {- Copyright 2016 Joey Hess - @@ -10,6 +10,7 @@ module Benchmark where import Types import Tunables import ExpensiveHash +import HTTP.ProofOfWork import Cost import Serialization () import qualified Data.ByteString.UTF8 as BU8 @@ -20,6 +21,15 @@ import Control.Monad import Data.Monoid import Data.Maybe +data BenchmarkResult t = BenchmarkResult { expectedBenchmark :: t, actualBenchmark :: t } + +instance Show (BenchmarkResult (Cost op)) where + show br = " expected: " ++ showtime (expectedBenchmark br) ++ "s" + ++ "\tactual: " ++ showtime (actualBenchmark br) ++ "s" + where + showtime (CPUCost (Seconds s) _) = + show (fromRational s :: Double) + benchmarkExpensiveHash :: Int -> ExpensiveHashTunable -> IO (BenchmarkResult (Cost CreationOp)) benchmarkExpensiveHash rounds tunables = benchmarkExpensiveHash' rounds tunables (getexpected tunables) @@ -40,36 +50,63 @@ benchmarkExpensiveHash' rounds tunables@(UseArgon2 _ hashopts) expected = do (base <> "himom") t `deepseq` return () end <- getCurrentTime - let diff = floor $ end `diffUTCTime` start + let diff = floor (end `diffUTCTime` start) :: Integer let maxthreads = Argon2.hashParallelism hashopts - let actual = CPUCost (Seconds diff) (Divisibility $ fromIntegral maxthreads) + let actual = CPUCost (Seconds (fromIntegral diff)) + (Divisibility $ fromIntegral maxthreads) -- The expected cost is for a single core, so adjust it -- based on the number of cores, up to a maximum of the number -- of threads that the hash is configred to use. let usedcores = min maxthreads numcores - let adjustedexpected = mapCost (`div` fromIntegral usedcores) expected + let adjustedexpected = mapCost (/ fromIntegral usedcores) expected return $ BenchmarkResult { expectedBenchmark = adjustedexpected , actualBenchmark = actual } +benchmark :: NFData t => Int -> Cost CreationOp -> (Int -> IO t) -> IO (BenchmarkResult (Cost CreationOp)) +benchmark rounds expected a = do + start <- getCurrentTime + forM_ [1..rounds] $ \n -> do + v <- a n + v `deepseq` return () + end <- getCurrentTime + let diff = floor (end `diffUTCTime` start) :: Integer + return $ BenchmarkResult + { expectedBenchmark = expected + , actualBenchmark = CPUCost (Seconds (fromIntegral diff)) (Divisibility 1) + } + +benchmarkPoW :: Int -> Seconds -> IO (BenchmarkResult (Cost CreationOp)) +benchmarkPoW rounds seconds = do + let Just mk = mkProofOfWorkRequirement seconds + s <- newRequestIDSecret + rid <- mkRequestID s + benchmark rounds (CPUCost (seconds * Seconds (fromIntegral rounds)) (Divisibility 1)) + (return . genProofOfWork (mk rid)) + benchmarkTunables :: Tunables -> IO () benchmarkTunables tunables = do putStrLn "/proc/cpuinfo:" putStrLn =<< readFile "/proc/cpuinfo" - putStrLn $ "Benchmarking 16 rounds of proof of work hash..." - print =<< benchmarkExpensiveHash 16 (proofOfWorkHashTunable 0) + putStrLn "Benchmarking 1000 rounds of proof of work hash..." + print =<< benchmarkExpensiveHash 1000 (proofOfWorkHashTunable 0) + + putStrLn "Benchmarking 60 rounds of 1 second proofs of work..." + print =<< benchmarkPoW 60 (Seconds 1) + + putStrLn "Benchmarking 10 rounds of 8 second proofs of work..." + print =<< benchmarkPoW 10 (Seconds 8) -- Rather than run all 256 rounds of this hash, which would -- probably take on the order of 1 hour, run only 16, and scale -- the expected cost accordingly. - let normalrounds = fromIntegral $ - 256 * randomSaltBytes (keyEncryptionKeyTunable tunables) + let normalrounds = 256 * randomSaltBytes (keyEncryptionKeyTunable tunables) putStrLn $ "Benchmarking 16/" ++ show normalrounds ++ " rounds of key encryption key hash..." r <- benchmarkExpensiveHash' 16 (keyEncryptionKeyHash $ keyEncryptionKeyTunable tunables) - (mapCost (`div` (normalrounds `div` 16)) $ randomSaltBytesBruteForceCost $ keyEncryptionKeyTunable tunables) + (mapCost (/ (fromIntegral normalrounds / 16)) $ randomSaltBytesBruteForceCost $ keyEncryptionKeyTunable tunables) print r putStrLn $ "Estimated time for " ++ show normalrounds ++ " rounds of key encryption key hash..." print $ BenchmarkResult diff --git a/Cost.hs b/Cost.hs index 77c2c4c..dc2438e 100644 --- a/Cost.hs +++ b/Cost.hs @@ -22,7 +22,7 @@ totalCost (CPUCost s _) = (s, [UsingCPU]) raiseCostPower :: Cost c -> Entropy e -> Cost c raiseCostPower c (Entropy e) = mapCost (* 2^e) c -mapCost :: (Integer -> Integer) -> Cost op -> Cost op +mapCost :: (Rational-> Rational) -> Cost op -> Cost op mapCost f (CPUCost (Seconds n) d) = CPUCost (Seconds (f n)) d type NumCores = Integer @@ -30,14 +30,15 @@ type NumCores = Integer showCostMinutes :: NumCores -> Cost op -> String showCostMinutes numcores (CPUCost (Seconds n) (Divisibility d)) | n' < 61 = "1 minute" - | otherwise = show (n' `div` 60) ++ " minutes" + | otherwise = show (n' / 60) ++ " minutes" where - n' = n `div` min numcores d + n' :: Double + n' = fromRational n / fromIntegral (min numcores d) -- If an operation took n seconds on a number of cores, -- multiply to get the CPUCost, which is for a single core. coreCost :: NumCores -> Seconds -> Divisibility -> Cost op -coreCost cores (Seconds n) d = CPUCost (Seconds (cores * n)) d +coreCost cores (Seconds n) d = CPUCost (Seconds (fromIntegral cores * n)) d castCost :: Cost a -> Cost b castCost (CPUCost s d) = CPUCost s d @@ -82,11 +83,11 @@ estimateAttackCost :: DataCenterPrice -> Cost BruteForceOp -> Dollars estimateAttackCost dc opcost = centsToDollars $ costcents where (Seconds cpuseconds) = fst (totalCost opcost) - cpuyears = cpuseconds `div` (60*60*24*365) + cpuyears = cpuseconds / (60*60*24*365) costpercpuyear = Cents $ fromIntegral (instanceCostPerHour dc) * 24 * 365 `div` (instanceCpuCores dc * instanceCpuCoreMultiplier dc) - costcents = Cents cpuyears * costpercpuyear + costcents = Cents (ceiling cpuyears) * costpercpuyear newtype Cents = Cents Integer deriving (Num, Integral, Enum, Real, Ord, Eq, Show) diff --git a/HTTP/ProofOfWork.hs b/HTTP/ProofOfWork.hs index 476ba87..35b5ffd 100644 --- a/HTTP/ProofOfWork.hs +++ b/HTTP/ProofOfWork.hs @@ -21,6 +21,7 @@ import Raaz.Core.Encode import qualified Raaz import Data.BloomFilter.Hash import Control.Monad +import Control.DeepSeq import Data.Word import Data.Bits import Data.Monoid @@ -28,7 +29,9 @@ import Prelude -- | A value that the client has to do some work to calculate. data ProofOfWork = ProofOfWork B.ByteString RequestID - deriving (Show) + deriving (Show, Generic) + +instance NFData ProofOfWork data ProofOfWorkRequirement = ProofOfWorkRequirement { leadingZeros :: Int @@ -45,10 +48,7 @@ data RequestID = RequestID } deriving (Generic, Show, Eq) --- | Using Text and not ByteString so that ProofOfWorkRequirement can have a --- JSON instance. -newtype RandomSalt = RandomSalt { fromRandomSalt :: T.Text } - deriving (Generic, Show, Eq) +instance NFData RequestID instance Hashable RequestID where hashIO32 = hashIO32 . hashRequestID @@ -58,6 +58,13 @@ hashRequestID :: RequestID -> B.ByteString hashRequestID rid = encodeUtf8 (fromRandomSalt (randomSalt rid)) <> ":" <> encodeUtf8 (requestHMAC rid) +-- | Using Text and not ByteString so that ProofOfWorkRequirement can have a +-- JSON instance. +newtype RandomSalt = RandomSalt { fromRandomSalt :: T.Text } + deriving (Generic, Show, Eq) + +instance NFData RandomSalt + -- | Servers should never demand a proof of work that takes longer than -- this to generate. Note that if a server changes its mind and doubles -- the proof of work, a client counts that cumulatively. So, a server @@ -82,7 +89,7 @@ mkProofOfWorkRequirement (Seconds n) | lz < 1 = Nothing | otherwise = Just $ ProofOfWorkRequirement lz its where - lz = floor (logBase 2 (max 1 (fromIntegral n / fromIntegral s)) :: Double) + lz = floor (logBase 2 (fromRational (max 1 (n / s))) :: Double) UseArgon2 (CPUCost (Seconds s) _) _ = proofOfWorkHashTunable its its = 0 @@ -125,10 +132,12 @@ data NoPOWIdent = NoPOWIdent instance POWIdent NoPOWIdent where getPOWIdent NoPOWIdent = B.empty +instance POWIdent Int where + getPOWIdent = encodeUtf8 . T.pack . show + -- Note that this does not check validRequestID. isValidProofOfWork :: POWIdent p => ProofOfWork -> ProofOfWorkRequirement -> p -> Bool -isValidProofOfWork (ProofOfWork pow rid) req p = - samerequestids && enoughzeros +isValidProofOfWork (ProofOfWork pow rid) req p = samerequestids && enoughzeros where samerequestids = rid == requestID req enoughzeros = all (== False) (take (leadingZeros req) (setBits b)) diff --git a/HTTP/RateLimit.hs b/HTTP/RateLimit.hs index 45c6b9a..39d7dbc 100644 --- a/HTTP/RateLimit.hs +++ b/HTTP/RateLimit.hs @@ -104,7 +104,7 @@ newRateLimiter cfg storedir logger = do <*> pure minFillInterval return (b:bs) - sdiv (Seconds n) d = Seconds (n `div` d) + sdiv (Seconds n) d = Seconds (n / d) mkBloomFilter :: IO BloomFilter mkBloomFilter = do diff --git a/Tunables.hs b/Tunables.hs index 1d087bf..5c28a39 100644 --- a/Tunables.hs +++ b/Tunables.hs @@ -142,17 +142,21 @@ knownObjectSizes = map (calc . snd) knownTunings calc t = objectSize t * shareOverhead t -- Hash for client-server Proof Of Work. This is tuned to take around --- 4 seconds to calculate the hash on a 4 core machine, with 0 added --- iterations. Adding more iterations will increase that somewhat. +-- 4 seconds to calculate the hash 256 times on a 4 core machine, with +-- 0 added iterations. Adding more iterations will increase that somewhat. -- -- This is not included in Tunables because it doesn't affect object --- encryption and storage. +-- encryption and storage. Any change to this will break backwards +-- compatability of the HTTP protocol! proofOfWorkHashTunable :: Word32 -> ExpensiveHashTunable proofOfWorkHashTunable addits = - UseArgon2 (CPUCost (Seconds (4 + (4 * fromIntegral addits `div` 20))) (Divisibility 4)) $ + UseArgon2 (CPUCost (Seconds nsecs) (Divisibility 4)) $ Argon2.HashOptions - { Argon2.hashIterations = 20 + addits - , Argon2.hashMemory = 131072 -- 128 mebibtyes per thread + { Argon2.hashIterations = nits + , Argon2.hashMemory = 1000 , Argon2.hashParallelism = 4 , Argon2.hashVariant = Argon2.Argon2i } + where + nits = 20 + addits + nsecs = (4 * fromIntegral nits / 20) / 256 diff --git a/Types.hs b/Types.hs index e129ea3..2ab5d6c 100644 --- a/Types.hs +++ b/Types.hs @@ -61,6 +61,3 @@ data SecretKeySource = GpgKey KeyId | KeyFile FilePath -- A gpg keyid is the obvious example. data KeyId = KeyId B.ByteString deriving (Show) - -data BenchmarkResult t = BenchmarkResult { expectedBenchmark :: t, actualBenchmark :: t } - deriving (Show) diff --git a/Types/Cost.hs b/Types/Cost.hs index 372c43d..4b71116 100644 --- a/Types/Cost.hs +++ b/Types/Cost.hs @@ -13,7 +13,7 @@ data Cost op -- ^ cost in Seconds, using 1 physical CPU core deriving (Show, Eq, Ord) -newtype Seconds = Seconds Integer +newtype Seconds = Seconds Rational deriving (Num, Eq, Ord, Show) -- | How many CPU cores a single run of an operation can be divided amoung. -- cgit v1.2.3