From e336a4fdf3d55f01b8c2871ceb906544a493eeb7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 21 Apr 2017 12:16:38 -0400 Subject: use cereal for efficient serialization for wire format I tried both binary and cereal. For a ActivityMessage that takes 341 bytes in JSON and has a dataSize of 129, both used 165 bytes. Went with cereal since lazy bytestrings are not needed, and I might want to use https://hackage.haskell.org/package/safecopy later. (Perhaps I should be using protocol buffers or something to make it easier for non-haskell implementations? But that would complicate things a lot.) --- Json.hs | 13 ------------- Log.hs | 2 +- Serialization.hs | 15 +++++++++++++++ TODO | 6 ++---- Types.hs | 20 +++++++++++++++++--- Val.hs | 4 +++- debug-me.cabal | 9 ++++++--- protocol.txt | 4 +++- 8 files changed, 47 insertions(+), 26 deletions(-) delete mode 100644 Json.hs create mode 100644 Serialization.hs diff --git a/Json.hs b/Json.hs deleted file mode 100644 index 4486d21..0000000 --- a/Json.hs +++ /dev/null @@ -1,13 +0,0 @@ -module Json ( - module Data.Aeson, - Generic, - sumOptions -) where - -import GHC.Generics (Generic) -import Data.Aeson -import qualified Data.Aeson.Types as Aeson - --- | Nicer JSON encoding for sum types. -sumOptions :: Aeson.Options -sumOptions = defaultOptions { Aeson.sumEncoding = Aeson.ObjectWithSingleField } diff --git a/Log.hs b/Log.hs index 2993ad8..05cbdc3 100644 --- a/Log.hs +++ b/Log.hs @@ -5,7 +5,7 @@ module Log where import Types import Hash import Memory -import Json +import Serialization import Data.Char import Data.Either diff --git a/Serialization.hs b/Serialization.hs new file mode 100644 index 0000000..6940550 --- /dev/null +++ b/Serialization.hs @@ -0,0 +1,15 @@ +module Serialization ( + module Data.Aeson, + Serialize, + Generic, + sumOptions +) where + +import GHC.Generics (Generic) +import Data.Serialize +import Data.Aeson +import qualified Data.Aeson.Types as Aeson + +-- | Nicer JSON encoding for sum types. +sumOptions :: Aeson.Options +sumOptions = defaultOptions { Aeson.sumEncoding = Aeson.ObjectWithSingleField } diff --git a/TODO b/TODO index 601eb3f..c9e93ec 100644 --- a/TODO +++ b/TODO @@ -1,7 +1,5 @@ -* JSON serialization of signatures uses base64, and - does not compress well. gzip -9 of just the base64 signature - is 113 bytes, double the 64 byte actual size. Find a more efficient - encoding for bytestrings in JSON. +* Use protobuf for serialization, to make non-haskell implementations + easier? * Leave the prevMessage out of Activity serialization to save BW. Do include it in the data that gets signed, so it can be recovered by trying each likely (recently seen) Activity as the prevMessage, and diff --git a/Types.hs b/Types.hs index e20228d..699fa59 100644 --- a/Types.hs +++ b/Types.hs @@ -2,8 +2,9 @@ {- | Main types for debug-me - - - Note that changing types in ways that change the JSON serialization - - changes debug-me's wire format. + - Note that changing types in ways that change the cereal serialization + - changes debug-me's wire format. Changing types in ways that change the + - aeson serialization changes debug-me's log format. -} module Types ( @@ -13,7 +14,7 @@ module Types ( import Val import Memory -import Json +import Serialization -- | Things that the developer sees. data Seen = Seen @@ -135,43 +136,56 @@ newtype GpgSig = GpgSig Val instance DataSize GpgSig where dataSize (GpgSig s) = dataSize s +instance Serialize Seen instance ToJSON Seen instance FromJSON Seen +instance Serialize Entered instance ToJSON Entered instance FromJSON Entered +instance Serialize (Activity Seen) instance ToJSON (Activity Seen) instance FromJSON (Activity Seen) +instance Serialize (Activity Entered) instance ToJSON (Activity Entered) instance FromJSON (Activity Entered) +instance Serialize Control instance ToJSON Control instance FromJSON Control +instance Serialize Hash instance ToJSON Hash instance FromJSON Hash +instance Serialize HashMethod instance ToJSON HashMethod instance FromJSON HashMethod +instance Serialize PublicKey instance ToJSON PublicKey instance FromJSON PublicKey +instance Serialize GpgSig instance ToJSON GpgSig instance FromJSON GpgSig +instance Serialize (Message Seen) instance ToJSON (Message Seen) where toJSON = genericToJSON sumOptions toEncoding = genericToEncoding sumOptions instance FromJSON (Message Seen) where parseJSON = genericParseJSON sumOptions +instance Serialize (Message Entered) instance ToJSON (Message Entered) where toJSON = genericToJSON sumOptions toEncoding = genericToEncoding sumOptions instance FromJSON (Message Entered) where parseJSON = genericParseJSON sumOptions +instance Serialize Signature instance ToJSON Signature where toJSON = genericToJSON sumOptions toEncoding = genericToEncoding sumOptions instance FromJSON Signature where parseJSON = genericParseJSON sumOptions +instance Serialize ControlAction instance ToJSON ControlAction where toJSON = genericToJSON sumOptions toEncoding = genericToEncoding sumOptions diff --git a/Val.hs b/Val.hs index 40e718b..80ab8ed 100644 --- a/Val.hs +++ b/Val.hs @@ -3,9 +3,9 @@ module Val where import Memory +import Serialization import GHC.Generics (Generic) -import Data.Aeson import Data.Aeson.Types import qualified Codec.Binary.Base64 as B64 import qualified Data.Text as T @@ -19,6 +19,8 @@ newtype Val = Val { val :: B.ByteString } instance DataSize Val where dataSize (Val b) = fromIntegral (B.length b) +instance Serialize Val + -- | JSON instances for Val, using base64 encoding when the value -- is not utf-8 encoded, and otherwise using a more efficient encoding. instance ToJSON Val where diff --git a/debug-me.cabal b/debug-me.cabal index 05ab1e9..5042ee3 100644 --- a/debug-me.cabal +++ b/debug-me.cabal @@ -22,7 +22,7 @@ Executable debug-me Main-Is: debug-me.hs GHC-Options: -threaded -Wall -fno-warn-tabs -O2 Build-Depends: - base (>= 4.5 && < 5.0) + base (>= 4.9 && < 5.0) , bytestring == 0.10.* , cryptonite (>= 0.20) , unix (>= 2.7) @@ -33,24 +33,27 @@ Executable debug-me , terminal-size (>= 0.3) , aeson (>= 0.11 && < 1.1) , sandi (>= 0.4) - , text (>= 1.2) + , text (>= 1.2.2) + , cereal (>= 0.5) , optparse-applicative (>= 0.12) , graphviz (== 2999.18.*) , time (>= 1.6) , unbounded-delays (>= 0.1) , memory (>= 0.13) + --, websockets (>= 0.9) + --, wai-websockets (>= 3.0) Other-Modules: CmdLine Crypto Graphviz Hash - Json Log Memory Pty Replay Session Types + Serialization Val source-repository head diff --git a/protocol.txt b/protocol.txt index c71a84f..687c085 100644 --- a/protocol.txt +++ b/protocol.txt @@ -3,7 +3,9 @@ the two participants, known as the user and the developer. (The exact composition of the JSON objects is not described here; see Types.hs for the data types that JSON serialization instances are derived -from.) +from. Also, debug-me uses a binary format instead of sending JSON +over the wire. The wire format is currently implemented using the +Haskell cereal library, and is not specified.) The Activity type is the main message type. The user sends Activity Seen messages, and the developer responds with Activity Entered. -- cgit v1.2.3