Initial commit.

author: tony <tsyrogit@users.noreply.github.com> 2015-01-03 00:16:04 +0000
committer: tony <tsyrogit@users.noreply.github.com> 2015-01-03 00:16:04 +0000
commit: 38a981df53d53cc96ef47e15d62b7c9f0151315f (patch)
tree: 7059c14f2e43d7c5d3cbb48ce5ec4004c6a752ca /dict-generate.cpp
parent: 572ce633e134e53bd6a72220613feb82a5ef0a16 (diff)
download: zxcvbn-c-38a981df53d53cc96ef47e15d62b7c9f0151315f.tar.gz
1 files changed, 1613 insertions, 0 deletions
diff --git a/dict-generate.cpp b/dict-generate.cpp
new file mode 100644
index 0000000..cd2fb05
--- /dev/null
+++ b/dict-generate.cpp
@@ -0,0 +1,1613 @@
+/**********************************************************************************
+ * Program to generate the dictionary for the C implementation of the zxcvbn password estimator.
+ * Copyright (c) 2015, Tony Evans
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list
+ *    of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this
+ *    list of conditions and the following disclaimer in the documentation and/or other
+ *    materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors may be
+ *    used to endorse or promote products derived from this software without specific
+ *    prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * 
+ **********************************************************************************/
+ 
+#include <iostream>
+#include <string>
+#include <fstream>
+#include <list>
+#include <set>
+#include <vector>
+#include <map>
+#include <memory>
+#include <limits>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+using namespace std;
+
+class Node;
+typedef std::shared_ptr<Node> NodeSPtr;
+typedef std::weak_ptr<Node> NodeWPtr;
+typedef std::map<char, NodeSPtr> NodeMap_t;
+
+typedef unsigned int Check_t;
+
+/**********************************************************************************
+ * Class to perform CRC checksum calculation.
+ */
+class TrieCheck
+{
+public:
+    typedef uint64_t Check_t;
+    static const Check_t CHK_INIT = 0xffffffffffffffff;
+    TrieCheck()          { Init(); }
+    void Init()          { mCrc = CHK_INIT; }
+    operator Check_t() const { return Result(); }
+    Check_t  Result()  const { return mCrc; }
+    bool operator ! () const { return mCrc == CHK_INIT; }
+    void operator () (const void *, unsigned int);
+protected:
+    Check_t mCrc;
+};
+
+/**********************************************************************************
+ * Class to hold a node within the trie
+ */
+class Node
+{
+public:
+    Node();
+    Node(const Node &);
+    ~Node();
+    Node & operator = (const Node &);
+    //bool operator == (const Node & r) const { return !IsEqual(r); }
+    //bool operator != (const Node & r) const { return !IsEqual(r); }
+    void          SetEnd()           { mEnd = true; }
+    bool          IsEnd() const      { return mEnd; }
+    int           Height() const     { return mHeight; }
+    
+    // Scan the trie and count nodes
+    int           NodeCount()        { ClearCounted() ; return CountNodes(); }
+    
+    
+    int           CalcAddress()      { int a=0; ClearCounted(); a=CalcAddr(a, true); return CalcAddr(a, false); }
+    Node         *GetParent()        { return mParent; }
+    unsigned int  GetAddr() const    { return mAddr; }
+    NodeMap_t::iterator ChildBegin() { return mChild.begin(); }
+    NodeMap_t::iterator ChildEnd()   { return mChild.end(); }
+    int           GetNumEnds() const { return mEndings; }
+    NodeSPtr      FindChild(char);
+    std::string   GetChildChars();
+
+    TrieCheck::Check_t CalcCheck();
+    int         CalcEndings();
+    int         CalcHeight();
+    NodeSPtr    AddChild(char);
+    void        ChangeChild(NodeSPtr &, NodeSPtr &);
+//    bool        IsEqual(const Node &) const;
+    void        ClearCounted();
+    void        SetCounted()    { mCounted = true; }
+    bool        IsCounted() const { return mCounted; }
+protected:
+    int      CountNodes();
+    int      CalcAddr(int, bool);
+
+    NodeMap_t    mChild;
+    Node        *mParent;
+    int          mEndings;
+    int          mHeight;
+    unsigned int mAddr;
+    TrieCheck    mCheck;
+    bool         mEnd;
+    bool         mCounted;
+};
+
+/**********************************************************************************
+ * Static table used for the crc implementation.
+ */
+static const TrieCheck::Check_t CrcTable[16] =
+{
+    0x0000000000000000, 0x7d08ff3b88be6f81, 0xfa11fe77117cdf02, 0x8719014c99c2b083,
+    0xdf7adabd7a6e2d6f, 0xa2722586f2d042ee, 0x256b24ca6b12f26d, 0x5863dbf1e3ac9dec,
+    0x95ac9329ac4bc9b5, 0xe8a46c1224f5a634, 0x6fbd6d5ebd3716b7, 0x12b5926535897936,
+    0x4ad64994d625e4da, 0x37deb6af5e9b8b5b, 0xb0c7b7e3c7593bd8, 0xcdcf48d84fe75459
+};
+
+// Update the crc value with new data.
+void TrieCheck::operator () (const void *v, unsigned int Len)
+{
+    Check_t Crc = mCrc;
+    const unsigned char *Data = reinterpret_cast<const unsigned char *>(v);
+    while(Len--)
+    {
+        Crc = CrcTable[(Crc ^ (*Data >> 0)) & 0x0f] ^ (Crc >> 4);
+        Crc = CrcTable[(Crc ^ (*Data >> 4)) & 0x0f] ^ (Crc >> 4);
+        ++Data;
+    }
+    mCrc = Crc;
+}
+
+Node::Node()
+{
+    mEndings = -1;
+    mHeight = -1;
+    mEnd    = false;
+    mParent = 0;
+}
+
+Node::Node(const Node &r)
+{
+    *this = r;
+}
+
+Node::~Node()
+{
+
+}
+
+Node &Node::operator = (const Node & r)
+{
+    mChild   = r.mChild;
+    mParent  = r.mParent;
+    mEndings = r.mEndings;
+    mHeight  = r.mHeight;
+    mCheck   = r.mCheck;
+    mEnd     = r.mEnd;
+    return *this;
+}
+
+
+/**********************************************************************************
+ * Generate a checksum for the current node. Value also depends of the
+ * checksum of any child nodes
+ */
+TrieCheck::Check_t Node::CalcCheck()
+{
+    if (!mCheck)
+    {
+        // Not done this node before
+        char c;
+        NodeMap_t::iterator It;
+        mCheck.Init();
+        // Include number of children
+        c = mChild.size();
+        mCheck(&c, sizeof c);
+        // For each child include its character and node checksum
+        for(It = mChild.begin(); It != mChild.end(); ++It)
+        {
+            Check_t n = It->second->CalcCheck();
+            c = It->first;
+            mCheck(&c, sizeof c);
+            mCheck(&n, sizeof n);
+        }
+        // Finally include whether this node is an ending in the chaecksum
+        c = mEnd;
+        mCheck(&c, sizeof c);
+    }
+    return mCheck;
+}
+
+/**********************************************************************************
+ * Get number of nodes for this which end/finish a word
+ */
+int Node::CalcEndings()
+{
+    if (mEndings < 0)
+    {
+        // Not already done this node,so calculate the ends
+        int n = 0;
+        NodeMap_t::iterator It;
+        // Number of endings is sum of the endings of the child nodes and plus this node if it ends a word
+        for(It = mChild.begin(); It != mChild.end(); ++It)
+            n += It->second->CalcEndings();
+        n += !!mEnd;
+        mEndings = n;
+    }
+    return mEndings;
+    
+}
+
+/**********************************************************************************
+ * Calculate the height of the trie starting at current node
+ */
+int Node::CalcHeight()
+{
+    if (mHeight < 0)
+    {
+        // Not already done this node,so calculate the height
+        int Hi = 0;
+        NodeMap_t::iterator It;
+        // Get height of all child nodes, remember the highest
+        for(It = mChild.begin(); It != mChild.end(); ++It)
+        {
+            int i = It->second->CalcHeight();
+            if (i >= Hi)
+                Hi = i+1;
+        }
+        mHeight = Hi;
+    }
+    return mHeight;
+}
+
+/**********************************************************************************
+ * Clear indication that node has been counted
+ */
+void Node::ClearCounted()
+{
+    NodeMap_t::iterator It;
+    mCounted = false;
+    for(It = mChild.begin(); It != mChild.end(); ++It)
+        It->second->ClearCounted();
+}
+
+/**********************************************************************************
+ * Count this plus the number of child nodes. As part of the tree node count
+ * scan, make sure not to double count nodes
+ */
+int Node::CountNodes()
+{
+    // Count is 0 if already done
+    if (mCounted)
+        return 0;
+    mCounted = true;
+    NodeMap_t::iterator It;
+    int i = 1; // 1 for this node
+
+    // Add the child nodes
+    for(It = mChild.begin(); It != mChild.end(); ++It)
+        i += It->second->CountNodes();
+    return i;
+}
+
+/**********************************************************************************
+ * Calculate the final node address
+ */
+int Node::CalcAddr(int Start, bool ManyEnds)
+{
+    NodeMap_t::iterator It;
+
+    if (!(mCounted || (ManyEnds && (mEndings < 256))))
+    {
+        mCounted = true;
+        mAddr = Start++;
+    }
+    for(It = mChild.begin(); It != mChild.end(); ++It)
+        Start = It->second->CalcAddr(Start, ManyEnds);
+
+    return Start;
+}
+
+/**********************************************************************************
+ * Add the given character to the current node, return the next lower node
+ */
+NodeSPtr Node::AddChild(char c)
+{
+    NodeMap_t::iterator It;
+    // Find character in map of child nodes
+    It = mChild.find(c);
+    if (It == mChild.end())
+    {
+        // New character, create new child node
+        NodeSPtr a(new Node);
+        a->mParent = this;
+        std::pair<char, NodeSPtr> x(c, a);
+        std::pair<NodeMap_t::iterator, bool> y = mChild.insert(x);
+        It = y.first;
+    }
+    return It->second;
+}
+
+/**********************************************************************************
+ * Find the child node which corresponds to the given character.
+ */
+NodeSPtr Node::FindChild(char Ch)
+{
+    NodeMap_t::iterator It;
+    It = mChild.find(Ch);
+    if (It == mChild.end())
+        return NodeSPtr();
+    return It->second;
+}
+
+/**********************************************************************************
+ * Replace the current child node (old param) with a new child (Replace param),
+ * and update the new child parent.
+ */
+void Node::ChangeChild(NodeSPtr & Replace, NodeSPtr & Old)
+{
+    NodeMap_t::iterator It;
+    for(It = mChild.begin(); It != mChild.end(); ++It)
+    {
+        NodeSPtr p = It->second;
+        if (p == Old)
+        {
+            It->second = Replace;
+            Replace->mParent = this;
+            break;
+        }
+    }
+}
+
+/**********************************************************************************
+ * Find all the characters corresponding to the children of this node.
+ */
+std::string Node::GetChildChars()
+{
+    NodeMap_t::iterator It;
+    std::string Result;
+    for(It = mChild.begin(); It != mChild.end(); ++It)
+    {
+        char c = It->first;
+        Result += c;
+    }
+    return Result;
+}
+
+
+/**********************************************************************************
+ * struct to hold data read from input file (except for the word string)
+ */
+struct Entry
+{
+    Entry() : mRank(0), mDict(0), mOrder(0), mOccurs(0) {}
+    int mRank;
+    int mDict;
+    int mOrder;
+    int mOccurs;
+};
+
+/**********************************************************************************
+ * Struct to hold a string and an int. Also provide the compare operators for std::set class
+ */
+struct StringInt
+{
+    string       s;
+    unsigned int i;
+    StringInt() { i=0; }
+    StringInt(const StringInt & r) : s(r.s), i(r.i) {}
+    StringInt & operator = (const StringInt & r) { i = r.i; s = r.s; return *this; }
+    bool operator < (const StringInt & r)  const { return s < r.s; }
+    bool operator > (const StringInt & r)  const { return s > r.s; }
+    bool operator == (const StringInt & r) const { return s == r.s; }
+    StringInt * Self() const { return const_cast<StringInt *>(this); }
+};
+
+typedef std::map<std::string, Entry> EntryMap_t;
+typedef std::list<NodeSPtr> NodeList_t;
+typedef set<StringInt> StringIntSet_t;
+typedef basic_string<int> StringOfInts;
+typedef vector<unsigned int> UintVect;
+typedef vector<StringInt *> StrIntPtrVect_t;
+typedef vector<StringInt> StringIntVect_t;
+
+// Variables holding 'interesting' information on the data
+unsigned int MaxLength, MinLength, NumChars, NumInWords, NumDuplicate;
+int MaxOccurReduce;
+string MaxOccurStr;
+struct FileInfo
+{
+    FileInfo() : Words(0), BruteIgnore(0), Accented(0), Dups(0), Used(0) { }
+    string Name;
+    int Words;
+    int BruteIgnore;
+    int Accented;
+    int Dups;
+    int Used;
+};
+
+/**********************************************************************************
+ * Read the file of words and add them to Entries.
+ */
+static bool ReadInputFile(EntryMap_t & Entries, const string & FileName, int DictNum, FileInfo &Info)
+{
+    ifstream f(FileName.c_str());
+    if (!f.is_open())
+    {
+        cerr << "Error opening " << FileName << endl;
+        return false;
+    }
+    Info.Name = FileName;
+    
+    // Rank is the position of the work in the dictionary file. Rank==1 is lowest for a word (and
+    // indicates a very popular or bad password).
+    int Rank = 0;
+    string Line;
+    while(getline(f, Line))
+    {
+        // Truncate at first space or tab to leave just the word in case additional info on line
+        string::size_type y = Line.find_first_of("\t ");
+        if (y != string::npos)
+            Line.erase(y);
+
+        y = Line.length();
+        if (!y)
+            continue;
+        
+        ++Info.Words;
+        // Only use words where all chars are ascii (no accents etc.)
+        string::size_type x;
+        double BruteForce = 1.0;
+        for(x = 0; x < y; ++x)
+        {
+            unsigned char c = Line[x];
+            if (c >= 128)
+                break;
+            c = tolower(c);
+            Line[x] = c;
+            BruteForce *= 26.0;
+        }
+        if (x < y)
+        {
+            ++Info.Accented;
+            continue;
+        }
+        
+        // Don't use words where the brute force strength is less than the word's rank
+        if (BruteForce < (Rank+1))
+        {
+            ++Info.BruteIgnore;
+            continue;
+        }
+        // Remember some interesting info
+        if (y > MaxLength)
+            MaxLength = y;
+        if (y < MinLength)
+            MinLength = y;
+        NumChars += y;
+        ++NumInWords;
+        ++Rank;
+
+        EntryMap_t::iterator It = Entries.find(Line);
+        if (It != Entries.end())
+        {
+            // This is a repeat of a previous entry
+            int r = It->second.mRank;
+            if (r > Rank)
+            {
+                // Remember new lower rank
+                It->second.mRank = Rank;
+                It->second.mDict = DictNum;
+                It->second.mOccurs += 1;
+                r -= Rank;
+                if (r > MaxOccurReduce)
+                {
+                    MaxOccurStr = Line;
+                    MaxOccurReduce = r;
+                }
+            }
+            else
+                ++Info.Dups;
+            ++NumDuplicate;
+        }
+        else
+        {
+            // New word
+            Entry e;
+            e.mDict = DictNum;
+            e.mRank = Rank;
+            Entries.insert(std::pair<std::string, Entry>(Line, e));
+            ++Info.Used;
+        }
+    }
+    f.close();
+    return true;
+}
+
+/**********************************************************************************
+ * Use all words previously read from file(s) and add them to a Trie, which starts
+ * at Root. Also update a bool array indicating the chars used in the words.
+ */
+static void ProcessEntries(NodeSPtr Root, EntryMap_t & Entries, bool *InputCharSet)
+{
+    EntryMap_t::iterator It;
+    std::string Text;
+    for(It = Entries.begin(); It != Entries.end(); ++It)
+    {
+        Text = It->first;
+        // Add latest word to tree
+        string::size_type x;
+        NodeSPtr pNode = Root;
+        for(x = 0; x < Text.length(); ++x)
+        {
+            char c = Text[x];
+            pNode = pNode->AddChild(c);
+            // Add char to set of used character codes
+            InputCharSet[c & 0xFF] = true;
+        }
+        pNode->SetEnd();
+    } 
+}
+
+/**********************************************************************************
+ * Add the passed node to the list if it has same height  as value in Hi (= number
+ * of steps to get to a terminal node). If current node has height greater than Hi,
+ * recursivly call with each child node as one of these may be at the required height.
+ */
+static void AddToListAtHeight(NodeList_t & Lst, NodeSPtr Node, int Hi)
+{
+    if (Hi == Node->Height())
+    {
+        Lst.push_back(Node);
+        return;
+    }
+    if (Hi < Node->Height())
+    {
+        NodeMap_t::iterator It;
+        for(It = Node->ChildBegin(); It != Node->ChildEnd(); ++It)
+        {
+            AddToListAtHeight(Lst, It->second, Hi);
+        }
+    }
+}
+
+/**********************************************************************************
+ * Scan the trie and update the original word list with the alphabetical order
+ * (or 'index location') of the words
+ */
+static void ScanTrieForOrder(EntryMap_t & Entries, int & Ord, NodeSPtr Root, const string & Str)
+{
+    if (Root->IsEnd())
+    {
+        // Root is a word ending node, so store its index in the input word store
+        EntryMap_t::iterator Ite;
+        Ite = Entries.find(Str);
+        if (Ite == Entries.end())
+            throw "Trie string not in entries";
+
+        Ite->second.mOrder = ++Ord;
+    }
+    NodeMap_t::iterator It;
+    string Tmp;
+    // For each child, append its character to the current word string and do a recursive
+    // call to update their word indexes.
+    for(It = Root->ChildBegin(); It != Root->ChildEnd(); ++It)
+    {
+        Tmp = Str + It->first;
+        ScanTrieForOrder(Entries, Ord, It->second, Tmp);
+    }
+}
+
+/**********************************************************************************
+ * Reduce the trie by merging tails where possible. Starting at greatest height,
+ * get a list of all nodes with given height, then test for identical nodes. If
+ * found, change the parent of the second identical node to use the first node,
+ * and delete second node and its children. Reduce height by one and repeat
+ * until height is zero.
+ */
+static void ReduceTrie(NodeSPtr Root)
+{
+    int Height;
+    int cnt=0, del=0;
+    Root->CalcCheck();
+    
+    NodeSPtr pNode = Root;
+    for(Height = Root->CalcHeight(); Height >= 0; --Height)
+    {
+        // Get a list of all nodes at given height
+        int x=0;
+        NodeList_t Lst;
+        AddToListAtHeight(Lst, Root, Height);
+        cnt += Lst.size();
+
+        NodeList_t::iterator Ita, Itb;
+        for(Ita = Lst.begin(); Ita != Lst.end(); ++Ita)
+        {
+            // Going to use a CRC to decide if two nodes are identical
+            TrieCheck::Check_t Chka = (*Ita)->CalcCheck();
+            Itb = Ita;
+            for(++Itb; Itb != Lst.end(); )
+            {
+                if (Chka == (*Itb)->CalcCheck())
+                {
+                    // Found two identical nodes (with identical children)
+                    Node * Parentb = (*Itb)->GetParent();
+                    if (Parentb)
+                    {
+                        // Change the 2nd parent to use the current node as child
+                        // Remove the 2nd node from the scanning list to as it will
+                        // get deleted by the sharing (as using std::shared_ptr)
+                        Parentb->ChangeChild(*Ita, *Itb);
+                        ++x;++del;
+                        Itb = Lst.erase(Itb);
+                    }
+                    else
+                    {
+                        cout << " orphan ";
+                        ++Itb;
+                    }
+                }
+                else
+                {
+                    ++Itb;
+                }
+            }
+        }
+    }
+}
+
+/**********************************************************************************
+ * Scan the trie to match with the supplied word. Return the order of the
+ * word, or -1 if it is not in the trie.
+ */
+static int CheckWord(NodeSPtr Root, const string & Str)
+{
+    int i = 1;
+    bool e = false;
+    string::size_type x;
+    NodeSPtr p = Root;
+
+    for(x = 0; x < Str.length(); ++x)
+    {
+        int j;
+        NodeMap_t::iterator It;
+        // Scan children to find one that matches current character
+        char c = Str[x];
+        for(It = p->ChildBegin(); It != p->ChildEnd(); ++It)
+        {
+            if (It->first == c)
+                break;
+            // Add the number of endings at or below child to track the alphabetical index
+            j = It->second->CalcEndings();
+            i += j;
+        }
+        // Fail if no child matches the character
+        if (It == p->ChildEnd())
+            return -1;
+        // Allow for this node being a word ending
+        e = p->IsEnd();
+        if (e)
+            ++i;
+
+        p = It->second;
+    }
+
+    if (p && p->IsEnd())
+    {
+        if (x == Str.length())
+            return i;
+    }
+    return -1;
+}
+
+/**********************************************************************************
+ * Try to find every input word in the reduced trie. The order should also
+ * match, otherwise the reduction has corrupted the trie.
+ */
+static int CheckReduction(StringIntVect_t & Ranks, NodeSPtr Root, EntryMap_t & Entries)
+{
+    int i = 0;
+    int n = 0;
+    EntryMap_t::iterator It;
+    std::string Text;
+    int b;
+    Ranks.resize(Entries.size()+1);
+    for(It = Entries.begin(); (It != Entries.end()) && (i <= 200000); ++It)
+    {
+        Text = It->first;
+        b = CheckWord(Root, Text);
+        if (b < 0)
+        {
+            ++i;
+            cout << It->second.mOrder << ": Missing " << Text.c_str() << endl;
+        }
+        else if (It->second.mOrder != b)
+        {
+            ++i;
+            cout << It->second.mOrder << ": Bad order " << b << " for  " << Text.c_str() << endl;
+        }
+        else
+        {
+            //if (Text == "fred")
+            //    cout << Text.c_str() << "-> " << It->second.mOrder << ", " << It->second.mRank << endl;
+            ++n;
+        }
+        if (b >= int(Ranks.size()))
+            throw " Using  Ranks beyond end";
+        if (b >= 0)
+        {
+            char Tmp[20];
+            Ranks[b].i = It->second.mRank;
+            sprintf(Tmp, "%d: ", n);
+            Ranks[b].s = string(Tmp) + Text;
+        }
+        // Try to find a non-existant word
+        Text.insert(0, "a");
+        Text += '#';
+        b = CheckWord(Root, Text);
+        if (b > 0)
+            throw string("Found non-existant word ") + Text;
+     }
+     if (i > 0)
+         throw "Missing words in reduction check = " + to_string(i);
+    return n;
+}
+
+struct ChkNum
+{
+    int Match;
+    int Err;
+    ChkNum() : Match(0), Err(0) {}
+    ChkNum(const ChkNum &r) : Match(r.Match), Err(r.Err) {}
+    ChkNum & operator = (const ChkNum & r) { Match = r.Match; Err = r.Err; return *this; }
+    ChkNum & operator += (const ChkNum & r) { Match += r.Match; Err += r.Err; return *this; }
+};
+
+/**********************************************************************************
+ * Find all possible words in the trie and make sure they are input words.
+ * Return number of words found. Done as a second trie check.
+ */
+static ChkNum CheckEntries(NodeSPtr Root, string Str, const EntryMap_t & Entries)
+{
+    ChkNum Ret;
+    if (Root->IsEnd())
+    {
+        // This is an end node, find the word in the input words
+        EntryMap_t::const_iterator It = Entries.find(Str);
+        if (It != Entries.end())
+            ++Ret.Match;
+        else
+            ++Ret.Err;
+    }
+    // Add each child character to the passed string and recursively check
+    NodeMap_t::iterator It;
+    for(It = Root->ChildBegin(); It != Root->ChildEnd(); ++It)
+    {
+        string Tmp = Str;
+        Tmp += It->first;
+        Ret += CheckEntries(It->second, Tmp, Entries);
+    }
+    return Ret;
+}
+
+/**********************************************************************************
+ * Convert the passed bool array of used chars into a character string
+ */
+string MakeCharSet(bool *InputCharSet)
+{
+    int i;
+    string s;
+    for(i = 1; i < 256; ++i)
+    {
+        if (InputCharSet[i])
+            s += char(i);
+    }
+    return s;
+}
+
+/**********************************************************************************
+ * Create a set of strings which contain the possible characters matched at
+ * a node when checking a word. 
+ */
+void MakeChildBitMap(StringIntSet_t & StrSet, NodeSPtr Root, int & Loc)
+{
+    // Skip if already done
+    if (Root->IsCounted())
+        return;
+
+    string::size_type x;
+    StringInt In;
+    NodeSPtr p = Root;
+    In.s = Root->GetChildChars();
+    if (StrSet.find(In) == StrSet.end())
+    {
+        // Not already in set of possible child chars for a node, so add it
+        In.i = Loc++; // Address in the final output array
+        StrSet.insert(In);
+    }
+    // Recursively do the child nodes
+    for(x = 0; x < In.s.length(); ++x)
+    {
+        char c = In.s[x];
+        NodeSPtr q = p->FindChild(c);
+        if (q)
+            MakeChildBitMap(StrSet, q, Loc);
+    }
+    Root->SetCounted();
+}
+
+/**********************************************************************************
+ * Create the arrays of data that will be output
+ */
+void CreateArrays(NodeSPtr Root, StringIntSet_t & StrSet, StringOfInts & ChildAddrs, UintVect & NodeData, UintVect & NodeEnds)
+{
+    NodeMap_t::iterator Itc;
+    StringInt Tmp;
+    StringOfInts Chld;
+    
+    // Find children in the child pattern array
+    Tmp.s= Root->GetChildChars();
+    StringIntSet_t::iterator Its = StrSet.find(Tmp);
+
+    // Make a 'string' of pointers to the children
+    for(Itc = Root->ChildBegin(); Itc != Root->ChildEnd(); ++Itc)
+    {
+        int i = Itc->second->GetAddr();
+        Chld += i;
+    }
+    // Find where in pointer array the child pointer string is
+    StringOfInts::size_type x = ChildAddrs.find(Chld);
+    if (x == StringOfInts::npos)
+    {
+        // Not found, add it
+        x = ChildAddrs.length();
+        ChildAddrs += Chld;
+    }
+    // Val will contain the final node data
+    // Bits 12:0    Index of the child char pattern in the final child bitmap array
+    // Bits 29:13   Index of where the child pointers start for this node in the Child map array
+    // Bit 30       Set if this nod is a word ending
+    // Bit 31       Set if the number of word endings for this + child nodes is >= 256
+    unsigned int Val = Its->i;
+    Val |= x << 13;
+    if (Root->IsEnd())
+        Val |= 1<<30;
+    if (Root->GetNumEnds() >= 256)
+        Val |= 1<<31;
+    
+    // Make sure output arrays are big enough
+    if (Root->GetAddr() > NodeData.size())
+    {
+        NodeData.resize(Root->GetAddr()+1, 4000000000);
+        NodeEnds.resize(Root->GetAddr()+1, 4000000000);
+    }
+    // Save the node data and number of word endings for the node
+    NodeData[Root->GetAddr()] = Val;
+    NodeEnds[Root->GetAddr()] = Root->GetNumEnds();
+
+    // Now do the children
+    for(Itc = Root->ChildBegin(); Itc != Root->ChildEnd(); ++Itc)
+    {
+        CreateArrays(Itc->second, StrSet, ChildAddrs, NodeData, NodeEnds);
+    }
+}
+
+/**********************************************************************************
+ * Output the data as a binary file.
+ */
+static int OutputBinary(ostream *Out, const string & ChkFile, const string & CharSet, StringIntSet_t & StrSet, //NodeSPtr & Root, 
+                        StringOfInts & ChildAddrs, UintVect & NodeData, UintVect & NodeEnds, StringIntVect_t & Ranks)
+{
+    int OutputSize;
+    unsigned int FewEndStart = 2000000000;
+    unsigned int i;
+    unsigned int Index;
+    unsigned short u;
+    TrieCheck h;
+
+    for(Index = 0; Index < NodeData.size(); ++Index)
+    {
+        i = NodeData[Index];
+        if ((FewEndStart >= 2000000000) && !(i & (1<<31)))
+        {
+            FewEndStart = Index;
+            break;
+        }
+    }
+    // Header words
+    const unsigned int MAGIC = 'z' + ('x'<< 8) + ('c' << 16) + ('v' << 24);
+    Out->write((char *)&MAGIC, sizeof MAGIC);   // Write magic
+    h(&MAGIC, sizeof MAGIC);
+    OutputSize = sizeof MAGIC;
+    
+    i = NodeData.size();
+    Out->write((char *)&i, sizeof i);   // Write number of nodes
+    h(&i, sizeof i);
+    OutputSize += sizeof i;
+    
+    i = ChildAddrs.size();
+    if (NodeData.size() > numeric_limits<unsigned short>::max())
+        i |= 1<<31;
+    Out->write((char *)&i, sizeof i);   // Write number of child location entries & size of each entry
+    h(&i, sizeof i);
+    OutputSize += sizeof i;
+
+    i = Ranks.size();
+    Out->write((char *)&i, sizeof i);   // Write number of ranks
+    h(&i, sizeof i);
+    OutputSize += sizeof i;
+    
+    i = StrSet.size();
+    Out->write((char *)&i, sizeof i);   // Write size of of child bitmap data
+    h(&i, sizeof i);
+    OutputSize += sizeof i;
+
+    unsigned int BytePerEntry = (CharSet.length() + 7) / 8;
+    Out->write((char *)&BytePerEntry, sizeof BytePerEntry);   // Write size of each child bitmap
+    h(&BytePerEntry, sizeof BytePerEntry);
+    OutputSize += sizeof BytePerEntry;
+
+    Out->write((char *)&FewEndStart, sizeof FewEndStart);   // Write number of large end counts
+    h(&FewEndStart, sizeof FewEndStart);
+    OutputSize += sizeof FewEndStart;
+
+    i = NodeData.size();
+    Out->write((char *)&i, sizeof i);   // Write number of end counts
+    h(&i, sizeof i);
+    OutputSize += sizeof i;
+
+    i = CharSet.length();
+    Out->write((char *)&i, sizeof i);   // Write size of character set
+    h(&i, sizeof i);
+    OutputSize += sizeof i;
+
+    // Output array of node data
+    for(Index = 0; Index < NodeData.size(); ++Index)
+    {
+        i = NodeData[Index];
+        Out->write((char *)&i, sizeof i);
+        h(&i, sizeof i);
+    }    
+    OutputSize += Index * sizeof i;
+
+    // Output array of node pointers
+    if (NodeData.size() > numeric_limits<unsigned short>::max())
+    {
+        for(Index = 0; Index < ChildAddrs.size(); ++Index)
+        {
+            i = ChildAddrs[Index];
+            Out->write((char *)&i, sizeof i);
+            h(&i, sizeof i);
+        }
+        OutputSize += Index * sizeof i;
+    }
+    else
+    {
+        for(Index = 0; Index < ChildAddrs.size(); ++Index)
+        {
+            u = ChildAddrs[Index];
+            Out->write((char *)&u, sizeof u);
+            h(&u, sizeof u);
+        }
+        OutputSize += Index * sizeof u;
+    }
+    // Output ranks
+    for(Index = 0; Index < Ranks.size(); ++Index)
+    {
+        u = Ranks[Index].i;
+        Out->write((char *)&u, sizeof u);
+        h(&u, sizeof u);
+    }    
+    OutputSize += Index * sizeof u;
+
+    StringIntSet_t::iterator Its;
+    string Str;
+    unsigned char Buf[8];
+    
+    // Get the items from StrSet ordered by the index
+    StrIntPtrVect_t SetPtrs;
+    SetPtrs.resize(StrSet.size());
+    for(Its = StrSet.begin(); Its != StrSet.end(); ++Its)
+    {
+        StringInt *p = Its->Self();
+        if (p->i >= StrSet.size())
+        {
+            cout << "p->i=" << p->i << "  " << p->s.c_str() << endl;
+            throw "Bad index";
+        }
+        SetPtrs[p->i] = p;
+    }
+    // Output child bitmap
+    for(Index = 0; Index < SetPtrs.size(); ++Index)
+    {
+        string::size_type z, y;
+        StringInt *p;
+        memset(Buf, 0, sizeof Buf);
+        p = SetPtrs[Index];
+        Str = p->s;
+        for(z = 0; z < Str.length(); ++z)
+        {
+            y = CharSet.find(Str[z]);
+            if (y != string::npos)
+            {
+                Buf[y/8] |= 1 << (y & 7);
+            }
+        }
+        Out->write((char *)Buf, BytePerEntry);
+        h(Buf, BytePerEntry);
+     }
+    OutputSize += Index * BytePerEntry;
+    
+    unsigned char c;
+    for(Index = 0; Index < FewEndStart; ++Index)
+    {
+        i = NodeEnds[Index] >> 8;
+        if (i >= 256)
+            c = 0;
+        else
+            c = i;
+        Out->write((char *)&c, 1);
+        h(&c, 1);
+    }
+    OutputSize += Index * sizeof c;
+    for(Index = 0; Index < NodeEnds.size(); ++Index)
+    {
+        c = NodeEnds[Index];;
+        Out->write((char *)&c, 1);
+        h(&c, 1);
+    }
+    OutputSize += Index * sizeof c;
+
+    Out->write(CharSet.c_str(), CharSet.length());
+    h(CharSet.c_str(), CharSet.length());
+    OutputSize += CharSet.length();
+    
+    if (!ChkFile.empty())
+    {
+        // Write the checksum file
+        TrieCheck::Check_t x = h.Result();
+        ofstream f(ChkFile);
+        f << "static const unsigned char WordCheck[" << sizeof x << "] =\n{\n    ";
+        unsigned char *c = reinterpret_cast<unsigned char *>(&x);
+        for(Index = 0; Index < sizeof x; ++Index, ++c)
+        {
+            if (Index)
+                f << ',';
+            f << int(*c);
+        }
+        f << "\n};\n";
+        f << "#define WORD_FILE_SIZE " << OutputSize << endl;
+        f << "#define ROOT_NODE_LOC  0" << endl;
+        f.close();
+    }
+    return OutputSize;
+}
+
+int OutputTester(ostream *Out, bool /*Cmnts*/, StringIntVect_t & Ranks)
+{
+    unsigned int Index;
+    string Pwd;
+    for(Index = 01; Index < Ranks.size(); ++Index)
+    {
+        unsigned int v = Ranks[Index].i;
+        Pwd = Ranks[Index].s;
+        string::size_type x = Pwd.find(':');
+        if (x != string::npos)
+            Pwd.erase(0, x+1);
+        
+        *Out << Pwd.c_str() << "  ";
+        for(x = Pwd.length(); x < 16; ++x)
+            *Out << ' ';
+        *Out << log(v*1.0) / log(2.0) << '\n';
+    }
+    return Index;
+}
+/**********************************************************************************
+ * Output the data as C source.
+ */
+int OutputCode(ostream *Out, bool Cmnts, const string & CharSet, StringIntSet_t & StrSet, NodeSPtr & Root,
+               StringOfInts & ChildAddrs, UintVect & NodeData, UintVect & NodeEnds, StringIntVect_t & Ranks)
+{
+    unsigned int Index;
+    int OutputSize;
+
+    if (Cmnts)
+        *Out << "#define ND(e,c,b) (e<<30)|(c<<13)|b\n";
+    
+    // Output array of node data
+    *Out << "#define ROOT_NODE_LOC 0" << endl;
+    *Out << "static const unsigned int DictNodes[" << NodeData.size() << "] =\n{";
+    OutputSize = NodeData.size() * sizeof(unsigned int);
+    int x = 99;
+    unsigned int FewEndStart = 2000000000;
+    for(Index = 0; Index < NodeData.size(); ++Index)
+    {
+        unsigned int v;
+        if (++x >= 8)
+        {
+            *Out << "\n    ";
+            x=0;
+        }
+        v = NodeData[Index];
+        if (Cmnts)
+        {
+            unsigned int i;
+            i = (v >> 30) & 3;
+            *Out << "ND(" << i << ',';
+            i= (v >> 13) & ((1<<17)-1);
+            *Out << i << ',';
+            if (i < 10000) *Out << ' ';
+            if (i < 1000)  *Out << ' ';
+            if (i < 100)   *Out << ' ';
+            if (i < 10)    *Out << ' ';
+            i = v & ((1<<13)-1);
+            *Out << i << ")";
+            if (Index < (NodeData.size()-1))
+            {
+                *Out << ',';
+                if (i < 1000)  *Out << ' ';
+                if (i < 100)   *Out << ' ';
+                if (i < 10)    *Out << ' ';
+            }
+        }
+        else
+        {    
+            *Out << v;
+            if (Index < (NodeData.size()-1))
+            {
+                *Out << ',';
+                if (v < 1000000000) *Out << ' ';
+                if (v < 100000000) *Out << ' ';
+                if (v < 10000000) *Out << ' ';
+                if (v < 1000000) *Out << ' ';
+                if (v < 100000) *Out << ' ';
+                if (v < 10000) *Out << ' ';
+                if (v < 1000)  *Out << ' ';
+                if (v < 100)   *Out << ' ';
+                if (v < 10)    *Out << ' ';
+            }
+        }
+        if ((FewEndStart >= 2000000000) && !(v & (1<<31)))
+            FewEndStart = Index;
+    }
+    *Out << "\n};\n";
+    
+    // Output array of node pointers
+    *Out << "static const unsigned ";
+    if (NodeData.size() > numeric_limits<unsigned short>::max())
+    {
+        *Out << "int";
+        x = sizeof(unsigned int); 
+    }
+    else
+    {
+        *Out << "short";
+        x = sizeof(unsigned short);
+    }
+    *Out << " ChildLocs[" << ChildAddrs.size() << "] =\n{";
+    OutputSize += x * ChildAddrs.size();
+    x = 99;
+    for(Index = 0; Index < ChildAddrs.size(); ++Index)
+    {
+        int v;
+        if (++x > 19)
+        {
+            *Out << "\n    ";
+            x=0;
+        }
+        v = ChildAddrs[Index];
+        *Out << v;
+        if (Index < (ChildAddrs.size()-1))
+        {
+            *Out << ',';
+            if (v < 10000) *Out << ' ';
+            if (v < 1000)  *Out << ' ';
+            if (v < 100)   *Out << ' ';
+            if (v < 10)    *Out << ' ';
+        }
+        
+    }
+    *Out << "\n};\n";
+
+    // Output the rank of the words
+    *Out << "static const unsigned short Ranks[" << Ranks.size() << "] =\n{";
+    OutputSize += Ranks.size() * sizeof(unsigned short);
+    x=99;
+    if (Cmnts)
+    {
+        *Out << "\n";
+        for(Index = 0; Index < Ranks.size(); ++Index)
+        {
+            unsigned int v;
+            *Out << "    ";
+            v = Ranks[Index].i;
+            *Out << v;
+            if (Index < (Ranks.size()-1))
+            {
+                *Out << ',';
+                if (v < 10000) *Out << ' ';
+                if (v < 1000)  *Out << ' ';
+                if (v < 100)   *Out << ' ';
+                if (v < 10)    *Out << ' ';
+            }
+            *Out << " // " << Ranks[Index].s.c_str() << '\n';
+        }
+    }
+    else
+    {
+        for(Index = 0; Index < Ranks.size(); ++Index)
+        {
+            unsigned int v;
+            if (++x > 19)
+            {
+                *Out << "\n    ";
+                x=0;
+            }
+            v = Ranks[Index].i;
+            *Out << v;
+            if (Index < (Ranks.size()-1))
+            {
+                *Out << ',';
+                if (v < 10000) *Out << ' ';
+                if (v < 1000)  *Out << ' ';
+                if (v < 100)   *Out << ' ';
+                if (v < 10)    *Out << ' ';
+            }
+        }
+    }
+    *Out << "\n};\n";
+
+    unsigned int BytePerEntry = (CharSet.length() + 7) / 8;
+    *Out << "#define SizeChildMapEntry " << BytePerEntry << '\n';
+    *Out << "static const unsigned char ChildMap[" << StrSet.size() << '*' << BytePerEntry << "] =\n{";
+    OutputSize += StrSet.size() * BytePerEntry * sizeof(unsigned char);
+        
+    StringIntSet_t::iterator Its;
+    string Str;
+    unsigned char Buf[8];
+    
+    // Get the items from StrSet ordered by the index
+    StrIntPtrVect_t SetPtrs;
+    SetPtrs.resize(StrSet.size());
+    for(Its = StrSet.begin(); Its != StrSet.end(); ++Its)
+    {
+        StringInt *p = Its->Self();
+        if (p->i >= StrSet.size())
+        {
+            cout << "p->i=" << p->i << "  " << p->s.c_str() << endl;
+            throw "Bad index";
+        }
+        SetPtrs[p->i] = p;
+    }
+
+    x=99;
+    for(Index = 0; Index < SetPtrs.size(); ++Index)
+    {
+        string::size_type z, y;
+        StringInt *p;
+        memset(Buf, 0, sizeof Buf);
+        if (x > 4)
+        {
+            *Out << "\n    ";
+            x = 0;
+        }
+        p = SetPtrs[Index];
+        Str = p->s;
+        for(z = 0; z < Str.length(); ++z)
+        {
+            y = CharSet.find(Str[z]);
+            if (y != string::npos)
+            {
+                Buf[y/8] |= 1 << (y & 7);
+            }
+        }
+        for(z = 0; z < BytePerEntry; ++z)
+        {
+            y = Buf[z] & 0xFF;
+            *Out << y;
+            if (z < (BytePerEntry-1))
+                *Out << ',';
+            else
+            {
+                if (Index < (SetPtrs.size() - 1))
+                    *Out << ", ";
+            }
+            if (y < 100)
+                *Out << ' ';
+            if (y < 10)
+                *Out << ' ';
+        }
+        ++x;
+        if (Cmnts)
+        {
+            *Out << " // " << p->i << ": " << Str;
+            x = 99;
+        }
+
+        
+    }
+    *Out << "\n};" << endl;
+
+    // Output the top 8 bits of the node word endings count. Since node with >255 endings have
+    // been placed at the begining, and ther are not too many of them the array is fairly small.
+    *Out << "static const unsigned char EndCountLge[" << FewEndStart << "] =\n{";
+    OutputSize += FewEndStart * sizeof(unsigned char);
+    x=99;
+    for(Index = 0; Index < FewEndStart; ++Index)
+    {
+        unsigned int v;
+        if (++x > 19)
+        {
+            *Out << "\n    ";
+            x=0;
+        }
+        v = NodeEnds[Index] >> 8;
+        if (v >= 256)
+            v = 0;
+        *Out << v;
+        if (Index < (FewEndStart-1))
+        {
+            *Out << ',';
+            if (v < 100)   *Out << ' ';
+            if (v < 10)    *Out << ' ';
+        }      
+    }
+    *Out << "\n};\n";
+    
+    // Output all the word ending counts. For the first few nodes this is just the lower 8 bits of
+    // the value. For the rest each entry contains the whole count. The split between lower and
+    // upper halves of the value for the first few nodes allows bytes arrays to be used, so saving
+    // memory.
+    *Out << "static const unsigned char EndCountSml[" << NodeEnds.size() << "] =\n{";
+    OutputSize += NodeEnds.size() * sizeof(unsigned char);
+    x=99;
+    for(Index = 0; Index < NodeEnds.size(); ++Index)
+    {
+        unsigned int v;
+        if (++x > 19)
+        {
+            *Out << "\n    ";
+            x=0;
+        }
+        v = NodeEnds[Index] & 255;
+        *Out << v;
+        if (Index < (NodeEnds.size()-1))
+        {
+            *Out << ',';
+            if (v < 100)   *Out << ' ';
+            if (v < 10)    *Out << ' ';
+        }      
+    }
+    *Out << "\n};\n";
+    
+    // Finally output the used characters.
+    *Out << "static const char CharSet[" << CharSet.length()+1 << "] = \"";
+    OutputSize += CharSet.length() * sizeof(char);
+    for(Index = 0; Index < CharSet.length(); ++Index)
+    {
+        char c = CharSet[Index];
+        if ((c == '\\') || (c == '"'))
+            *Out << '\\';
+        *Out << c;
+    }
+    *Out << "\";" << endl;
+    *Out << "#define ROOT_NODE_LOC " << Root->GetAddr() << "\n";
+    return OutputSize + sizeof(unsigned int);
+}
+enum { OUT_C_CODE, OUT_BINARY, OUT_TESTER };
+/**********************************************************************************
+ */
+int main(int argc, char *argv[])
+{
+    int i;
+    int OutType = OUT_C_CODE;
+    bool Verbose = false;
+    bool Comments = false;
+    string FileName, HashFile;
+    char *OutFile = 0;
+    EntryMap_t Entries;
+    FileInfo InInfo[10];
+    int NumFiles = 0;
+    MinLength = 999;
+    
+    try
+    {
+        for(i = 1; i < argc; ++i)
+        {
+            FileName = argv[i];
+            if (FileName == "-b")
+            {
+                // Output a binary file to stdout or file
+                OutType = OUT_BINARY;
+                continue;
+            }
+            if (FileName == "-t")
+            {
+                // Output a tester file to stdout or file
+                OutType = OUT_TESTER;
+                continue;
+            }
+            if (FileName == "-c")
+            {
+                // Add comments to the output (if text)
+                Comments = true;
+                continue;
+            }
+            if (FileName == "-o")
+            {
+                // Give output file
+                if (++i < argc)
+                    OutFile = argv[i];
+                continue;
+            }
+            if (FileName == "-h")
+            {
+                // Give crc header output file
+                if (++i < argc)
+                    HashFile = argv[i];
+                continue;
+            }
+            if (FileName == "-v")
+            {
+                Verbose = true;
+                continue;
+            }
+            if (FileName[0] == '-')
+            {
+                cerr << "Usage: " << argv[0] << " [ -c ] [ -b | -t ] [ -o Ofile ] [ -h Hfile ] Files...\n"
+                        "Where:\n"
+                        "     -b        Generate a binary output file\n"
+                        "     -t        Generate a test file for testing zxcvbn\n"
+                        "     -c        Add comments to output file if C code mode\n"
+                        "     -v        Additional information output\n"
+                        "     -h Hfile  Write file checksum to file Hfile as C code (for -b mode)\n"
+                        "     -o Ofile  Write output to file Ofile\n"
+                        "     Files     The dictionary input files to read\n"
+                        "  If the -o option is not used, output is written to stdout\n"
+                        "  if the -b option is not used, output is in the form of C source code\n"
+                        << endl;
+                return 1;
+            }
+            ReadInputFile(Entries, FileName, i, InInfo[NumFiles]);
+            if (NumFiles < int(sizeof InInfo / sizeof InInfo[0] - 1))
+                ++NumFiles;
+        }
+        if (Verbose)
+        {
+            if (!OutFile && (OutType == OUT_C_CODE))
+                cout << "/*\n";
+            for(i = 0; i < NumFiles; ++i)
+            {
+                FileInfo *Fi = InInfo + i;
+                cout << "Read input file " << Fi->Name << endl;
+                cout << "   Input words  " << Fi->Words << endl;
+                cout << "   Used words   " << Fi->Used << endl;
+                cout << /*"        Unused  " << Fi->BruteIgnore <<
+                        " Bruteforce compare, " << Fi->Accented <<
+                        " Accented char, " << Fi->Dups << " Duplicates" << */ endl;
+            }
+        }
+        bool InputCharSet[256];
+        NodeSPtr Root(new Node);
+        // Initially charset of used chracters is empty
+        memset(InputCharSet, 0, sizeof InputCharSet);
+        
+        // Add words to the trie with root in Root
+        ProcessEntries(Root, Entries, InputCharSet);
+        
+        // Get some interesting info
+        int NumEnds = Root->CalcEndings();
+        int Hi = Root->CalcHeight();
+        int NumNodes = Root->NodeCount();
+        if (Verbose)
+        {
+            cout << "Max word length = " << MaxLength << endl;
+            cout << "Min word length = " << MinLength << endl;
+            cout << "Num input chars = " << NumChars << endl;
+            cout << "Num input words = " << NumInWords << endl;
+            cout << "Duplicate words = " << NumDuplicate;
+            cout << "  (Rank most reduced for \"" << MaxOccurStr.c_str() << "\")"<< endl;
+            cout << "Number of Ends  = " << NumEnds << endl;
+            cout << "Number of Nodes = " << NumNodes << endl;
+            cout << "Trie height = " << Hi << endl;
+        }
+        // Store the alphabetical ordering of the input words
+        i = 0;
+        ScanTrieForOrder(Entries, i, Root, string());
+        if (Verbose)
+            cout << "Trie Order = " << i << endl;
+        int InputOrder = i;
+        // Reduce the Trie
+        ReduceTrie(Root);
+
+        // Output some interesting information
+        NumNodes = Root->NodeCount();
+        int ReduceEnds = Root->CalcEndings();
+        if (Verbose)
+        {
+            cout << "After reduce:\n";
+            cout << "Number of Ends  = " << ReduceEnds << endl;
+            cout << "Number of Nodes = " << NumNodes << endl;
+        }
+        // Check reduction was OK
+        StringIntVect_t Ranks;
+        int CheckEnds = CheckReduction(Ranks, Root, Entries);
+        if (Verbose)
+            cout << "Number of Words = " << CheckEnds << endl;
+       
+        ChkNum Tst = CheckEntries(Root, string(), Entries);
+        if (Verbose)
+        {
+            cout << "2nd check - Number of valid words = " << Tst.Match << endl;
+            cout << "          Number of invalid words = " << Tst.Err << endl;
+        }
+        
+        // Give up if there was an error
+        if (Tst.Err)
+            throw "Checks show invalid words after reduction";
+        if ((Tst.Match != InputOrder) || (ReduceEnds != InputOrder))
+            throw "Word count changed after reduce";
+        
+        // Output more info
+        StringIntSet_t ChildBits;
+        string CharSet = MakeCharSet(InputCharSet);
+        if (Verbose)
+            cout << "Used characters (" << CharSet.length() << "): " << CharSet.c_str() << endl;
+
+        // Make a set of all unique child character patterns for the nodes
+        i=0;
+        Root->ClearCounted();
+        MakeChildBitMap(ChildBits, Root, i);
+        if (Verbose)
+            cout << "Number of child bitmaps = " << ChildBits.size() << endl;
+        
+        // Get final node address
+        Root->CalcAddress();
+
+        UintVect NodeData;
+        UintVect NodeEnds;
+        StringOfInts ChildAddrs;
+
+        // Resize to save library adjusting allocation during data creation
+        NodeData.resize(NumNodes, 4000000000);
+        NodeEnds.resize(NumNodes, 4000000000);
+        CreateArrays(Root, ChildBits, ChildAddrs, NodeData, NodeEnds);
+        if (Verbose)
+        {
+            cout << "Node data array size " << NodeData.size() << endl;
+            cout << "Child pointer array size " << ChildAddrs.size() << endl;
+        }
+        shared_ptr<ofstream> fout;
+        ostream *Out = &cout;
+        if (OutFile)
+        {
+            fout = shared_ptr<ofstream>(new ofstream);
+            if (OutType == OUT_BINARY)
+                fout->open(OutFile, ios_base::trunc | ios_base::binary);
+            else
+                fout->open(OutFile, ios_base::trunc);
+            Out = fout.get();
+        }
+        if (!OutFile && (OutType == OUT_C_CODE))
+            cout << "*/\n";
+ 
+        if (OutType == OUT_BINARY)
+            i = OutputBinary(Out, HashFile, CharSet, ChildBits, ChildAddrs, NodeData, NodeEnds, Ranks);
+        else if (OutType == OUT_TESTER)
+            i = OutputTester(Out, Comments, Ranks);
+        else
+            i = OutputCode(Out, Comments, CharSet, ChildBits, Root, ChildAddrs, NodeData, NodeEnds, Ranks);
+
+        if (fout)
+        {
+            fout->close();
+        }
+    }
+    catch(const char *m)
+    {
+        cerr << m << endl;
+        return 1;
+    }
+    catch(string m)
+    {
+        cerr << m.c_str() << endl;
+        return 1;
+    }
+    catch(...)
+    {
+        cerr << "Unhandled exception" << endl;
+        return 1;
+    }
+    return 0;
+}
+
+/**********************************************************************************/
author	tony <tsyrogit@users.noreply.github.com>	2015-01-03 00:16:04 +0000
committer	tony <tsyrogit@users.noreply.github.com>	2015-01-03 00:16:04 +0000
commit	38a981df53d53cc96ef47e15d62b7c9f0151315f (patch)
tree	7059c14f2e43d7c5d3cbb48ce5ec4004c6a752ca /dict-generate.cpp
parent	572ce633e134e53bd6a72220613feb82a5ef0a16 (diff)
download	zxcvbn-c-38a981df53d53cc96ef47e15d62b7c9f0151315f.tar.gz