{- Copyright 2016 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}

module HTTP.RateLimit where

import Types.Cost
import HTTP
import HTTP.ProofOfWork
import Servant
import Control.Concurrent.STM
import Control.Concurrent.TokenBucket
import qualified Data.BloomFilter.Mutable as BloomFilter
import qualified Data.BloomFilter.Hash as BloomFilter
import Data.BloomFilter.Easy (suggestSizing)
import Control.Monad.ST
import System.IO
import Data.Maybe
import Data.Word
import Control.Monad.IO.Class

-- | A rate limiter is a series of buckets. Each bucket has a
-- successively more difficult proof of work access requirement.
-- 
-- To guard against DOS attacks that reuse the same proof of work,
-- RandomSalt values are used, and bloom filters keep track of
-- the ones that have been assigned and used.
data RateLimiter = RateLimiter
	{ buckets :: TMVar [Bucket]
	, assignedRandomSalts :: BloomFilter
	, assignedRandomSaltsOld :: BloomFilter
	, usedRandomSalts :: BloomFilter
	, usedRandomSaltsOld :: BloomFilter
	, numRandomSalts :: TMVar Int
	, randomSaltGenerationLimiter :: TokenBucket
	}

type BloomFilter = TMVar (BloomFilter.MBloom RealWorld RandomSalt)

-- | Buckets fill up at a fixed rate (which can be tuned by the server
-- operator), and accessing a bucket removes one unit from it.
data Bucket = Bucket
	{ tokenBucket :: TokenBucket
	, mkProofReq :: Maybe (RandomSalt -> ProofOfWorkRequirement)
	}

newRateLimiter :: IO RateLimiter
newRateLimiter = RateLimiter 
	<$> (newTMVarIO =<< mkbuckets maxProofOfWork [])
	<*> mkBloomFilter
	<*> mkBloomFilter
	<*> mkBloomFilter
	<*> mkBloomFilter
	<*> newTMVarIO 0
	<*> newTokenBucket
  where
	-- The last bucket takes half of maxProofOfWork to access, and
	-- each earlier bucket halves that time, down to the first bucket,
	-- which needs no proof of work. This ensures that in the edge case
	-- where a client keeps getting bumped up to more and more expensive
	-- buckets, it doesn't need to do more than maxProofOfWork total work.
	mkbuckets (Seconds n) bs
		| n <= 0 = return bs
		| otherwise = do
			let s = Seconds (n `div` 2)
			let mkreq = mkProofOfWorkRequirement s
			b <- Bucket
				<$> newTokenBucket
				<*> pure mkreq
			case mkreq of
				Nothing -> return (b:bs)
				Just _ -> mkbuckets s (b:bs)

mkBloomFilter :: IO BloomFilter
mkBloomFilter = do
	b <- stToIO $ BloomFilter.new (BloomFilter.cheapHashes bloomhashes) bloomsize
	newTMVarIO b
  where
	-- Size the bloom filter to hold 1 million items, with a false
	-- positive rate of 1 in 100 thousand. This will use around 32 mb
	-- of memory.
	(bloomhashes, bloomsize) = suggestSizing bloomMaxSize (1/100000)

-- | Maximum number of RandomSalts that can be stored in a bloom filter
-- without the false positive rate getting bad.
bloomMaxSize :: Int
bloomMaxSize = 1000000

-- | Size of the bucket. This allows a burst of accesses after an idle
-- period, which is especially useful when retrieving keys that were
-- split into multiple chunks. However, setting this too high lets clients
-- cheaply store lots of data, so keep the objectSize in mind.
burstSize :: Word64
burstSize = 4 -- allow 128 kb of data to be stored/retrieved w/o proof of work

-- | Rate that the bucket is filled.
fillRate :: Word64
fillRate = 60000000 -- 1 token per minute

-- A request is tried in each bucket in turn which its proof of work allows
-- access to. If all accessible token buckets are empty, generate a
-- new ProofOfWorkRequirement for the client.
--
-- If all buckets are tried and are empty, we must be very overloaded. 
-- In this case, the request is still processed, since the client has done
-- quite a lot of work.
rateLimit :: POWIdent p => RateLimiter -> Maybe ProofOfWork -> p -> Handler a -> Handler (POWGuarded a)
rateLimit ratelimiter mpow p a = do
	validsalt <- liftIO $ checkValidSalt ratelimiter mpow
	bs <- liftIO $ atomically $ readTMVar (buckets ratelimiter)
	if validsalt
		then go bs
		else assignWork ratelimiter bs
  where
	go [] = do
		liftIO $ hPutStrLn stderr "** warning: all token buckets are empty; possible DOS attack?"
		Result <$> a
	go (b:bs) = case mkProofReq b of
		Nothing -> checkbucket b bs
		Just mkreq -> case mpow of
			Nothing -> assignWork ratelimiter (b:bs)
			Just pow@(ProofOfWork _ salt) -> 
				if isValidProofOfWork pow (mkreq salt) p
					then checkbucket b bs
					else assignWork ratelimiter (b:bs)
	checkbucket b bs = do
		allowed <- liftIO $ tokenBucketTryAlloc (tokenBucket b) 
			burstSize fillRate 1
		if allowed
			then Result <$> a
			else go bs

checkValidSalt :: RateLimiter -> Maybe ProofOfWork -> IO Bool
checkValidSalt _ Nothing = return True
checkValidSalt rl (Just (ProofOfWork _ salt)) = do
	assigned <- iselem assignedRandomSalts
	oldassigned <- iselem assignedRandomSaltsOld
	used <- iselem usedRandomSalts
	oldused <- iselem usedRandomSaltsOld
	if assigned && not oldassigned && not used && not oldused
		then do
			withBloomFilter rl usedRandomSalts
				(`BloomFilter.insert` salt)
			return True
		else return False
  where
	iselem f = withBloomFilter rl f (BloomFilter.elem salt)

assignWork :: RateLimiter -> [Bucket] -> Handler (POWGuarded a)
assignWork ratelimiter bs = case mapMaybe mkProofReq bs of
	[] -> throwError err404
	(mkreq:_) -> liftIO $ do
		-- This prevents an attacker flooding requests that 
		-- cause new random salts to be assigned, in order 
		-- to fill up the bloom table and cause salts assigned 
		-- to other clients to be rejected.
		-- Since the bloom filters hold 1 million salts,
		-- the attacker would need to send requests for over 10
		-- hours to force a bloom filter rotation, so would not
		-- impact many users.
		tokenBucketWait (randomSaltGenerationLimiter ratelimiter)
			100 -- burst
			100000 -- refill 1 token per second

		salt <- liftIO mkRandomSalt
		withBloomFilter ratelimiter assignedRandomSalts
			(`BloomFilter.insert` salt)
		needrot <- atomically $ do
			n <- takeTMVar (numRandomSalts ratelimiter)
			if n > bloomMaxSize `div` 2
				then return Nothing
				else do
					putTMVar (numRandomSalts ratelimiter) (n+1)
					return (Just n)
		handlerotation needrot
		return $ NeedProofOfWork $ mkreq salt
  where
	handlerotation Nothing = return ()
	handlerotation (Just n) = do
		hPutStrLn stderr $ "rotating bloom filters after processing " ++ show n ++ " requests"
		newassigned <- mkBloomFilter
		newused <- mkBloomFilter
		atomically $ do
			oldassigned <- takeTMVar (assignedRandomSalts ratelimiter)
			oldused <- takeTMVar (usedRandomSalts ratelimiter)
			putTMVar (assignedRandomSaltsOld ratelimiter) oldassigned
			putTMVar (usedRandomSaltsOld ratelimiter) oldused
			putTMVar (assignedRandomSalts ratelimiter) =<< takeTMVar newassigned
			putTMVar (usedRandomSalts ratelimiter) =<< takeTMVar newused
			putTMVar (numRandomSalts ratelimiter) 0

withBloomFilter
	:: RateLimiter
	-> (RateLimiter -> BloomFilter)
	-> (BloomFilter.MBloom RealWorld RandomSalt -> ST RealWorld a)
	-> IO a
withBloomFilter rl field a = do
	b <- atomically $ readTMVar (field rl)
	stToIO (a b)