From 5c542bb09e51eeb407a59f5bd4a2c6c460863446 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 27 Apr 2017 12:01:20 -0400 Subject: switch wire message seralization to use protocol buffers This way it's not tied to details of the haskell binary library, and implementations in other languages should be fairly simple to do. The haskell protobuf library was used because it does not need extra tooling or build steps. So I didn't write a .proto file, but one could fairly easily be written by following ProtocolBuffers.hs and translating it. ProtocolBuffers.hs is *extremely* repetative and tedious code. Surely there must be a way to not need to write all of that? Oh well, I wrote it.. Sizes of serialized messages: ">>> debug-me session started": 121 sending a single key press: 169 This seems equally as efficient as the binary serialization was; that was 165 bytes before elapsedTime was added. This commit was sponsored by Ethan Aubin. --- Crypto.hs | 2 +- JSON.hs | 13 +++ Log.hs | 2 +- ProtocolBuffers.hs | 275 +++++++++++++++++++++++++++++++++++++++++++++++++++++ Serialization.hs | 15 --- SessionID.hs | 2 +- TODO | 2 - Types.hs | 24 +---- Val.hs | 4 +- WebSockets.hs | 17 ++-- debug-me.cabal | 6 +- protocol.txt | 18 ++-- 12 files changed, 320 insertions(+), 60 deletions(-) create mode 100644 JSON.hs create mode 100644 ProtocolBuffers.hs delete mode 100644 Serialization.hs diff --git a/Crypto.hs b/Crypto.hs index b23c8de..1c898e7 100644 --- a/Crypto.hs +++ b/Crypto.hs @@ -5,7 +5,7 @@ module Crypto where import Val import Hash import Types -import Serialization +import JSON import qualified Crypto.PubKey.Ed25519 as Ed25519 import Crypto.Error diff --git a/JSON.hs b/JSON.hs new file mode 100644 index 0000000..84ba9d1 --- /dev/null +++ b/JSON.hs @@ -0,0 +1,13 @@ +module JSON ( + module Data.Aeson, + Generic, + sumOptions +) where + +import GHC.Generics (Generic) +import Data.Aeson +import qualified Data.Aeson.Types as Aeson + +-- | Nicer JSON encoding for sum types. +sumOptions :: Aeson.Options +sumOptions = defaultOptions { Aeson.sumEncoding = Aeson.ObjectWithSingleField } diff --git a/Log.hs b/Log.hs index ffb4eb7..ecf0614 100644 --- a/Log.hs +++ b/Log.hs @@ -5,7 +5,7 @@ module Log where import Types import Hash import Memory -import Serialization +import JSON import Data.Char import Data.Time.Clock.POSIX diff --git a/ProtocolBuffers.hs b/ProtocolBuffers.hs new file mode 100644 index 0000000..6d02096 --- /dev/null +++ b/ProtocolBuffers.hs @@ -0,0 +1,275 @@ +{-# LANGUAGE DeriveGeneric, DataKinds, MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleContexts, UndecidableInstances #-} +{-# LANGUAGE OverloadedStrings #-} + +{- | Protocol buffers serialization for the debug-me wire protocol. + - + - The message types in here define protocol buffers, so should be changed + - with care. These messages correspond to the main data types in the Types + - module. + -} + +module ProtocolBuffers where + +import qualified Types as T +import Val + +import Data.ProtocolBuffers +import GHC.Generics (Generic) +import qualified Data.ByteString as B +import Data.Monoid +import Prelude + +data SeenP = SeenP + { seenDataP :: Required 1 (Value B.ByteString) + } + deriving (Generic) + +data EnteredP = EnteredP + { enteredDataP :: Required 2 (Value B.ByteString) + , echoDataP :: Required 3 (Value B.ByteString) + } + deriving (Generic) + +data MessageP a + = ActivityMessageP + { activityMessageP :: Required 4 (Message (ActivityP a)) } + | ControlMessageP + { controlMessageP :: Required 5 (Message ControlP) } + deriving (Generic) + +data ActivityP a = ActivityP + { activityP :: Required 6 (Message a) + , prevAtivityP :: Optional 7 (Message HashP) + , elapsedTimeP :: Required 8 (Message ElapsedTimeP) + , activitySignatureP :: Required 9 (Message SignatureP) + } + deriving (Generic) + +data ControlP = ControlP + { controlP :: Required 10 (Message ControlActionP) + , controlSignatureP ::Required 11 (Message SignatureP) + } + deriving (Generic) + +data ControlActionP + = RejectedP + { rejectedP :: Required 12 (Message (ActivityP EnteredP)) } + | SessionKeyP + { sessionKeyP :: Required 13 (Message (PerhapsSignedP PublicKeyP)) } + | SessionKeyAcceptedP + { sessionKeyAcceptedP :: Required 14 (Message PublicKeyP) } + | SessionKeyRejectedP + { sessionKeyRejectedP :: Required 15 (Message PublicKeyP) } + deriving (Generic) + +data HashP = HashP + { hashMethodP :: Required 16 (Value B.ByteString) + , hashValueP :: Required 17 (Value B.ByteString) + } + deriving (Generic) + +data SignatureP + = Ed25519SignatureP + { ed25519SignatureP :: Required 18 (Value B.ByteString) } + | OtherSignatureP + { otherSignatureP :: Required 19 (Value B.ByteString) } + deriving (Generic) + +data PublicKeyP = PublicKeyP + { mkPublicKeyP :: Required 20 (Value B.ByteString) } + deriving (Generic) + +data PerhapsSignedP a + = GpgSignedP + { gpgSignedValP :: Required 21 (Message a) + , gpgSigP :: Required 22 (Message GpgSigP) + } + | UnSignedP + { mkUnSignedP :: Required 23 (Message a ) + } + deriving (Generic) + +data GpgSigP = GpgSigP + { mkGpgSigP :: Required 24 (Value B.ByteString) } + deriving (Generic) + +data ElapsedTimeP = ElapsedTimeP + { mkElapsedTimeP :: Required 25 (Value Double) } + deriving (Generic) + +data AnyMessageP + = UserP { mkUserP :: Required 26 (Message (MessageP SeenP)) } + | DeveloperP { mkDeveloperP :: Required 27 (Message (MessageP EnteredP)) } + deriving (Generic) + +-- | Conversion between protocol buffer messages and debug-me's main Types. +class ProtocolBuffer p t where + toProtocolBuffer :: t -> p + fromProtocolBuffer :: p -> t + +instance ProtocolBuffer SeenP T.Seen where + toProtocolBuffer t = SeenP + { seenDataP = putField $ val $ T.seenData t + } + fromProtocolBuffer p =T.Seen + { T.seenData = Val $ getField $ seenDataP p + } + +instance ProtocolBuffer EnteredP T.Entered where + toProtocolBuffer t = EnteredP + { enteredDataP = putField $ val $ T.enteredData t + , echoDataP = putField $ val $ T.echoData t + } + fromProtocolBuffer p =T.Entered + { T.enteredData = Val $ getField $ enteredDataP p + , T.echoData = Val $ getField $ echoDataP p + } + +instance ProtocolBuffer (ActivityP p) (T.Activity t) => ProtocolBuffer (MessageP p) (T.Message t) where + toProtocolBuffer (T.ActivityMessage a) = + ActivityMessageP (putField (toProtocolBuffer a)) + toProtocolBuffer (T.ControlMessage c) = + ControlMessageP (putField (toProtocolBuffer c)) + fromProtocolBuffer p@(ActivityMessageP {}) = + T.ActivityMessage $ fromProtocolBuffer $ + getField $ activityMessageP p + fromProtocolBuffer p@(ControlMessageP {}) = + T.ControlMessage $ fromProtocolBuffer $ + getField $ controlMessageP p + +instance ProtocolBuffer p t => ProtocolBuffer (ActivityP p) (T.Activity t) where + toProtocolBuffer t = ActivityP + { activityP = putField $ toProtocolBuffer $ T.activity t + , prevAtivityP = putField $ fmap toProtocolBuffer $ T.prevActivity t + , elapsedTimeP = putField $ toProtocolBuffer $ T.elapsedTime t + , activitySignatureP = putField $ toProtocolBuffer $ T.activitySignature t + } + fromProtocolBuffer p = T.Activity + { T.activity = fromProtocolBuffer $ getField $ activityP p + , T.prevActivity = fmap fromProtocolBuffer $ getField $ prevAtivityP p + , T.elapsedTime = fromProtocolBuffer $ getField $ elapsedTimeP p + , T.activitySignature = fromProtocolBuffer $ getField $ activitySignatureP p + } + +instance ProtocolBuffer ControlP T.Control where + toProtocolBuffer t = ControlP + { controlP = putField $ toProtocolBuffer $ T.control t + , controlSignatureP = putField $ toProtocolBuffer $ T.controlSignature t + } + fromProtocolBuffer p = T.Control + { T.control = fromProtocolBuffer $ getField $ controlP p + , T.controlSignature = fromProtocolBuffer $ getField $ controlSignatureP p + } + +instance ProtocolBuffer ControlActionP T.ControlAction where + toProtocolBuffer (T.Rejected t) = RejectedP + { rejectedP = putField $ toProtocolBuffer t } + toProtocolBuffer (T.SessionKey t) = SessionKeyP + { sessionKeyP = putField $ toProtocolBuffer t } + toProtocolBuffer (T.SessionKeyAccepted t) = SessionKeyAcceptedP + { sessionKeyAcceptedP = putField $ toProtocolBuffer t } + toProtocolBuffer (T.SessionKeyRejected t) = SessionKeyRejectedP + { sessionKeyRejectedP = putField $ toProtocolBuffer t } + fromProtocolBuffer p@(RejectedP {}) = T.Rejected $ + fromProtocolBuffer $ getField $ rejectedP p + fromProtocolBuffer p@(SessionKeyP {}) = T.SessionKey $ + fromProtocolBuffer $ getField $ sessionKeyP p + fromProtocolBuffer p@(SessionKeyAcceptedP {}) = T.SessionKeyAccepted $ + fromProtocolBuffer $ getField $ sessionKeyAcceptedP p + fromProtocolBuffer p@(SessionKeyRejectedP {}) = T.SessionKeyRejected $ + fromProtocolBuffer $ getField $ sessionKeyRejectedP p + +instance ProtocolBuffer HashP T.Hash where + toProtocolBuffer t = HashP + { hashMethodP = putField $ case T.hashMethod t of + T.SHA256 -> "2" + T.SHA3 -> "3" + , hashValueP = putField $ val $ T.hashValue t + } + fromProtocolBuffer p = T.Hash + { T.hashMethod = case getField (hashMethodP p) of + "2" -> T.SHA256 + "3" -> T.SHA3 + _ -> T.SHA256 + , T.hashValue = Val $ getField $ hashValueP p + } + +instance ProtocolBuffer SignatureP T.Signature where + toProtocolBuffer (T.Ed25519Signature t) = Ed25519SignatureP + { ed25519SignatureP = putField $ val t } + toProtocolBuffer (T.OtherSignature t) = OtherSignatureP + { otherSignatureP = putField $ val t } + fromProtocolBuffer p@(Ed25519SignatureP {}) = T.Ed25519Signature $ + Val $ getField $ ed25519SignatureP p + fromProtocolBuffer p@(OtherSignatureP {}) = T.OtherSignature $ + Val $ getField $ otherSignatureP p + +instance ProtocolBuffer PublicKeyP T.PublicKey where + toProtocolBuffer (T.PublicKey t) = PublicKeyP + { mkPublicKeyP = putField (val t) } + fromProtocolBuffer p = T.PublicKey $ Val $ getField $ mkPublicKeyP p + +instance ProtocolBuffer p t => ProtocolBuffer (PerhapsSignedP p) (T.PerhapsSigned t) where + toProtocolBuffer (T.GpgSigned tv tg) = GpgSignedP + { gpgSignedValP = putField $ toProtocolBuffer tv + , gpgSigP = putField $ toProtocolBuffer tg + } + toProtocolBuffer (T.UnSigned tv) = UnSignedP + { mkUnSignedP = putField $ toProtocolBuffer tv + } + fromProtocolBuffer p@(GpgSignedP {}) = T.GpgSigned + (fromProtocolBuffer $ getField $ gpgSignedValP p) + (fromProtocolBuffer $ getField $ gpgSigP p) + fromProtocolBuffer p@(UnSignedP {}) = T.UnSigned + (fromProtocolBuffer $ getField $ mkUnSignedP p) + +instance ProtocolBuffer GpgSigP T.GpgSig where + toProtocolBuffer (T.GpgSig t) = GpgSigP + { mkGpgSigP = putField ( val t) } + fromProtocolBuffer p = T.GpgSig $ Val $ getField $ mkGpgSigP p + +instance ProtocolBuffer ElapsedTimeP T.ElapsedTime where + toProtocolBuffer (T.ElapsedTime t) = ElapsedTimeP + { mkElapsedTimeP = putField t } + fromProtocolBuffer p = T.ElapsedTime $ getField $ mkElapsedTimeP p + +instance ProtocolBuffer AnyMessageP T.AnyMessage where + toProtocolBuffer (T.User t) = UserP + { mkUserP = putField $ toProtocolBuffer t } + toProtocolBuffer (T.Developer t) = DeveloperP + { mkDeveloperP = putField $ toProtocolBuffer t } + fromProtocolBuffer p@(UserP {}) = T.User $ + fromProtocolBuffer $ getField $ mkUserP p + fromProtocolBuffer p@(DeveloperP {}) = T.Developer $ + fromProtocolBuffer $ getField $ mkDeveloperP p + +instance Encode SeenP +instance Decode SeenP +instance Encode EnteredP +instance Decode EnteredP +instance Encode ControlP +instance Decode ControlP +instance Encode ControlActionP +instance Decode ControlActionP +instance Encode HashP +instance Decode HashP +instance Encode SignatureP +instance Decode SignatureP +instance Encode PublicKeyP +instance Decode PublicKeyP +instance Encode GpgSigP +instance Decode GpgSigP +instance Encode ElapsedTimeP +instance Decode ElapsedTimeP +instance Encode AnyMessageP +instance Decode AnyMessageP +instance Encode a => Encode (MessageP a) +-- This is why UndecidableInstances is needed. The need +-- for a Monoid instance is an implementation detail of +-- Data.ProtocolBuffers. +instance (Monoid (Message a), Generic a, Decode a) => Decode (MessageP a) +instance Encode a => Encode (ActivityP a) +instance (Monoid (Message a), Generic a, Decode a) => Decode (ActivityP a) +instance Encode a => Encode (PerhapsSignedP a) +instance (Monoid (Message a), Generic a, Decode a) => Decode (PerhapsSignedP a) diff --git a/Serialization.hs b/Serialization.hs deleted file mode 100644 index bba2a52..0000000 --- a/Serialization.hs +++ /dev/null @@ -1,15 +0,0 @@ -module Serialization ( - module Data.Aeson, - Binary, - Generic, - sumOptions -) where - -import GHC.Generics (Generic) -import Data.Binary -import Data.Aeson -import qualified Data.Aeson.Types as Aeson - --- | Nicer JSON encoding for sum types. -sumOptions :: Aeson.Options -sumOptions = defaultOptions { Aeson.sumEncoding = Aeson.ObjectWithSingleField } diff --git a/SessionID.hs b/SessionID.hs index 449f58c..c526849 100644 --- a/SessionID.hs +++ b/SessionID.hs @@ -8,7 +8,7 @@ module SessionID ( sessionIDUrl, ) where -import Serialization +import JSON import System.FilePath import System.IO diff --git a/TODO b/TODO index a3fdee1..5702a9e 100644 --- a/TODO +++ b/TODO @@ -21,8 +21,6 @@ multiple developers, as each time a developer gets an Activity Seen, they can update their state to use the Activity Entered that it points to. -* Use protobuf for serialization, to make non-haskell implementations - easier? * Leave the prevMessage out of Activity serialization to save BW. Do include it in the data that gets signed, so it can be recovered by trying each likely (recently seen) Activity as the prevMessage, and diff --git a/Types.hs b/Types.hs index 678455c..18b197e 100644 --- a/Types.hs +++ b/Types.hs @@ -2,9 +2,9 @@ {- | Main types for debug-me - - - Note that changing types in ways that change the Binary serialization - - changes debug-me's wire format. Changing types in ways that change the - - aeson serialization changes debug-me's log format. + - Note that changing types in ways that change the JSON serialization + - changes debug-me's log format, and in some cases also changes the wire + - format. -} module Types ( @@ -14,7 +14,7 @@ module Types ( import Val import Memory -import Serialization +import JSON import Data.Time.Clock.POSIX @@ -169,7 +169,6 @@ data AnyMessage | Developer (Message Entered) deriving (Show, Generic) -instance Binary ElapsedTime instance ToJSON ElapsedTime instance FromJSON ElapsedTime @@ -177,70 +176,55 @@ instance DataSize AnyMessage where dataSize (User a) = dataSize a dataSize (Developer a) = dataSize a -instance Binary AnyMessage instance ToJSON AnyMessage where toJSON = genericToJSON sumOptions toEncoding = genericToEncoding sumOptions instance FromJSON AnyMessage where parseJSON = genericParseJSON sumOptions -instance Binary Seen instance ToJSON Seen instance FromJSON Seen -instance Binary Entered instance ToJSON Entered instance FromJSON Entered -instance Binary (Activity Seen) instance ToJSON (Activity Seen) instance FromJSON (Activity Seen) -instance Binary (Activity Entered) instance ToJSON (Activity Entered) instance FromJSON (Activity Entered) -instance Binary Control instance ToJSON Control instance FromJSON Control -instance Binary Hash instance ToJSON Hash instance FromJSON Hash -instance Binary HashMethod instance ToJSON HashMethod instance FromJSON HashMethod -instance Binary PublicKey instance ToJSON PublicKey instance FromJSON PublicKey -instance Binary GpgSig instance ToJSON GpgSig instance FromJSON GpgSig -instance Binary (Message Seen) instance ToJSON (Message Seen) where toJSON = genericToJSON sumOptions toEncoding = genericToEncoding sumOptions instance FromJSON (Message Seen) where parseJSON = genericParseJSON sumOptions -instance Binary (Message Entered) instance ToJSON (Message Entered) where toJSON = genericToJSON sumOptions toEncoding = genericToEncoding sumOptions instance FromJSON (Message Entered) where parseJSON = genericParseJSON sumOptions -instance Binary Signature instance ToJSON Signature where toJSON = genericToJSON sumOptions toEncoding = genericToEncoding sumOptions instance FromJSON Signature where parseJSON = genericParseJSON sumOptions -instance Binary ControlAction instance ToJSON ControlAction where toJSON = genericToJSON sumOptions toEncoding = genericToEncoding sumOptions instance FromJSON ControlAction where parseJSON = genericParseJSON sumOptions -instance Binary (PerhapsSigned PublicKey) instance ToJSON (PerhapsSigned PublicKey) where toJSON = genericToJSON sumOptions toEncoding = genericToEncoding sumOptions diff --git a/Val.hs b/Val.hs index 3493bcd..d307dde 100644 --- a/Val.hs +++ b/Val.hs @@ -3,7 +3,7 @@ module Val where import Memory -import Serialization +import JSON import GHC.Generics (Generic) import Data.Aeson.Types @@ -19,8 +19,6 @@ newtype Val = Val { val :: B.ByteString } instance DataSize Val where dataSize (Val b) = fromIntegral (B.length b) -instance Binary Val - -- | JSON instances for Val, using base64 encoding when the value -- is not utf-8 encoded, and otherwise using a more efficient encoding. instance ToJSON Val where diff --git a/WebSockets.hs b/WebSockets.hs index 2fa9e35..f159271 100644 --- a/WebSockets.hs +++ b/WebSockets.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE OverloadedStrings, DeriveGeneric, GeneralizedNewtypeDeriving, FlexibleContexts, FlexibleInstances #-} +{-# LANGUAGE OverloadedStrings, DeriveGeneric, GeneralizedNewtypeDeriving #-} +{-# LANGUAGE FlexibleContexts, FlexibleInstances, ScopedTypeVariables #-} module WebSockets ( connectionOptions, @@ -16,6 +17,7 @@ module WebSockets ( import Types import SessionID +import ProtocolBuffers import Network.WebSockets hiding (Message) import Control.Concurrent.STM @@ -24,8 +26,9 @@ import Control.Concurrent.Async import Control.Exception import GHC.Generics (Generic) import Data.Aeson (FromJSON, ToJSON) +import Data.ProtocolBuffers import qualified Data.Aeson -import qualified Data.Binary +import qualified Data.Serialize import qualified Data.Text as T import qualified Data.ByteString.Lazy as L import Data.List @@ -144,7 +147,9 @@ instance WebSocketsData WireProtocol where toLazyByteString (Version v) = "V" <> Data.Aeson.encode v toLazyByteString (SelectMode _ m) = "M" <> Data.Aeson.encode m toLazyByteString (Ready _ sid) = "R" <> Data.Aeson.encode sid - toLazyByteString (AnyMessage msg) = "L" <> Data.Binary.encode msg + toLazyByteString (AnyMessage msg) = "L" <> + let pmsg = toProtocolBuffer msg :: AnyMessageP + in Data.Serialize.runPutLazy (encodeMessage pmsg) toLazyByteString Done = "D" toLazyByteString (WireProtocolError s) = "E" <> Data.Aeson.encode s fromLazyByteString b = case L.splitAt 1 b of @@ -157,9 +162,9 @@ instance WebSocketsData WireProtocol where ("R", sid) -> maybe (WireProtocolError "invalid JSON in SessionID") (Ready ServerSends) (Data.Aeson.decode sid) - ("L", l) -> case Data.Binary.decodeOrFail l of - Left (_, _, err) -> WireProtocolError $ "Binary decode error: " ++ err - Right (_, _, msg) -> AnyMessage msg + ("L", l) -> case Data.Serialize.runGetLazy decodeMessage l of + Left err -> WireProtocolError $ "Protocol buffers decode error: " ++ err + Right (pmsg :: AnyMessageP) -> AnyMessage (fromProtocolBuffer pmsg) ("D", "") -> Done ("E", s) -> maybe (WireProtocolError "invalid JSON in WireProtocolError") WireProtocolError diff --git a/debug-me.cabal b/debug-me.cabal index 8db9d4d..70ea2ac 100644 --- a/debug-me.cabal +++ b/debug-me.cabal @@ -36,7 +36,6 @@ Executable debug-me , aeson (>= 0.11 && < 1.1) , sandi (>= 0.4) , text (>= 1.2.2) - , binary (>= 0.8) , optparse-applicative (>= 0.12) , graphviz (== 2999.18.*) , time (>= 1.6) @@ -51,21 +50,24 @@ Executable debug-me , websockets (>= 0.11.1) , wai-websockets (>= 3.0) , uuid (>= 1.3) + , protobuf + , cereal Other-Modules: CmdLine Crypto Graphviz Hash + JSON Log Memory Pty + ProtocolBuffers Replay Role.Developer Role.Downloader Role.User Role.Watcher Session - Serialization Server SessionID Types diff --git a/protocol.txt b/protocol.txt index 5bf0f7e..25f239d 100644 --- a/protocol.txt +++ b/protocol.txt @@ -1,12 +1,12 @@ -The debug-me protocol is a series of JSON objects, exchanged between -the two participants, known as the user and the developer. - -(The exact composition of the JSON objects is not described here; see -Types.hs for the data types that JSON serialization instances are derived -from. Also, debug-me uses a binary format instead of sending JSON -over the wire. The wire format is currently implemented using the -Haskell cereal library, and is not specified. There is also a simple -framing protocol used for communicating over websockets; see WebSockets.hs) +The debug-me protocol is a series of messages, exchanged between +the two participants, known as the user and the developer. + +The messages are serialized as JSON in debug-me log files, and protocol +buffers are used when sending the messages over the wire. We won't go into +the full details here. See Types.hs for the data types that JSON +serialization instances are derived from, and ProocolBuffers.hs for the +protocol buffers format. There is also a simple framing protocol used for +communicating over websockets; see WebSockets.hs. The Activity type is the main message type. The user sends Activity Seen messages, and the developer responds with Activity Entered. -- cgit v1.2.3