aboutsummaryrefslogtreecommitdiffhomepage
path: root/dict-generate.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'dict-generate.cpp')
-rw-r--r--dict-generate.cpp85
1 files changed, 58 insertions, 27 deletions
diff --git a/dict-generate.cpp b/dict-generate.cpp
index 410182d..711124d 100644
--- a/dict-generate.cpp
+++ b/dict-generate.cpp
@@ -1,32 +1,24 @@
/**********************************************************************************
* Program to generate the dictionary for the C implementation of the zxcvbn password estimator.
- * Copyright (c) 2015, Tony Evans
- * All rights reserved.
+ * Copyright (c) 2015-2017 Tony Evans
*
- * Redistribution and use in source and binary forms, with or without modification, are
- * permitted provided that the following conditions are met:
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
*
- * 1. Redistributions of source code must retain the above copyright notice, this list
- * of conditions and the following disclaimer.
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
*
- * 2. Redistributions in binary form must reproduce the above copyright notice, this
- * list of conditions and the following disclaimer in the documentation and/or other
- * materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors may be
- * used to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
- * SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
*
**********************************************************************************/
@@ -95,6 +87,7 @@ public:
unsigned int GetAddr() const { return mAddr; }
NodeMap_t::iterator ChildBegin() { return mChild.begin(); }
NodeMap_t::iterator ChildEnd() { return mChild.end(); }
+ unsigned int GetNumChild() { return mChild.size(); }
int GetNumEnds() const { return mEndings; }
NodeSPtr FindChild(char);
std::string GetChildChars();
@@ -402,6 +395,9 @@ typedef vector<StringInt> StringIntVect_t;
// Variables holding 'interesting' information on the data
unsigned int MaxLength, MinLength, NumChars, NumInWords, NumDuplicate;
+static string PassWithMaxChilds, MaxChildChars;
+static unsigned int MaxNumChilds, MaxChildsPosn;
+
struct FileInfo
{
FileInfo() : Words(0), BruteIgnore(0), Accented(0), Dups(0), Used(0), Rank(0) { }
@@ -686,6 +682,16 @@ static int CheckWord(NodeSPtr Root, const string & Str)
if (e)
++i;
+ if (p->GetNumChild() > MaxNumChilds)
+ {
+ NodeMap_t::iterator Itc;
+ MaxNumChilds = p->GetNumChild();
+ MaxChildsPosn = x;
+ PassWithMaxChilds = Str;
+ MaxChildChars.clear();
+ for(Itc = p->ChildBegin(); Itc != p->ChildEnd(); ++Itc)
+ MaxChildChars += Itc->first;
+ }
p = It->second;
}
@@ -1046,7 +1052,7 @@ static int OutputBinary(ostream *Out, const string & ChkFile, const string & Cha
Out->write((char *)WordEnds, NumWordEnd);
h(WordEnds, NumWordEnd);
OutputSize += NumWordEnd;
- delete WordEnds;
+ delete [] WordEnds;
StringIntSet_t::iterator Its;
string Str;
@@ -1063,8 +1069,10 @@ static int OutputBinary(ostream *Out, const string & ChkFile, const string & Cha
SetPtrs[p->i] = p;
}
// Output child bitmap
+ unsigned int CharSetLen = 0;
for(Index = 0; Index < SetPtrs.size(); ++Index)
{
+ unsigned int i, j;
string::size_type z, y;
StringInt *p;
memset(Buf, 0, sizeof Buf);
@@ -1078,6 +1086,15 @@ static int OutputBinary(ostream *Out, const string & ChkFile, const string & Cha
Buf[y/8] |= 1 << (y & 7);
}
}
+ // Find max bits set which indicates max number chars ued at a node
+ for(i = j = 0; i < 8 * sizeof Buf; ++i)
+ {
+ if (Buf[i/8] & (1 << (i & 7)))
+ ++j;
+ }
+ if (j > CharSetLen)
+ CharSetLen = j;
+
Out->write((char *)Buf, BytePerEntry);
h(Buf, BytePerEntry);
}
@@ -1127,7 +1144,8 @@ static int OutputBinary(ostream *Out, const string & ChkFile, const string & Cha
"#define BITS_CHILD_PATT_INDEX " << BITS_CHILD_PATT_INDEX << "\n"
"#define BITS_CHILD_MAP_INDEX " << BITS_CHILD_MAP_INDEX << "\n"
"#define SHIFT_CHILD_MAP_INDEX BITS_CHILD_PATT_INDEX\n"
- "#define SHIFT_WORD_ENDING_BIT (SHIFT_CHILD_MAP_INDEX + BITS_CHILD_MAP_INDEX)" << endl;
+ "#define SHIFT_WORD_ENDING_BIT (SHIFT_CHILD_MAP_INDEX + BITS_CHILD_MAP_INDEX)\n"
+ "#define CHARSET_SIZE " << (CharSetLen + 1) << endl;
f.close();
}
return OutputSize;
@@ -1403,9 +1421,12 @@ int OutputCode(ostream *Out, bool Cmnts, const string & CharSet, StringIntSet_t
}
SetPtrs[p->i] = p;
}
+ unsigned int CharSetLen = 0;
x = 999;
+ Len = 0;
for(Index = 0; Index < SetPtrs.size(); ++Index)
{
+ unsigned int i, j;
string::size_type z, y;
StringInt *p;
memset(Buf, 0, sizeof Buf);
@@ -1424,6 +1445,14 @@ int OutputCode(ostream *Out, bool Cmnts, const string & CharSet, StringIntSet_t
Buf[y/8] |= 1 << (y & 7);
}
}
+ // Find max bits set which indicates max number chars ued at a node
+ for(i = j = 0; i < 8 * sizeof Buf; ++i)
+ {
+ if (Buf[i/8] & (1 << (i & 7)))
+ ++j;
+ }
+ if (j > CharSetLen)
+ CharSetLen = j;
for(z = 0; z < BytePerEntry; ++z)
{
y = Buf[z] & 0xFF;
@@ -1447,7 +1476,7 @@ int OutputCode(ostream *Out, bool Cmnts, const string & CharSet, StringIntSet_t
x = 999;
}
}
- *Out << "\n};" << endl;
+ *Out << "\n};\n#define CHARSET_SIZE " << (CharSetLen+1) << endl;
// Output the top 8 bits of the node word endings count. Since node with >255 endings have
// been placed at the begining, and ther are not too many of them the array is fairly small.
@@ -1714,6 +1743,8 @@ int main(int argc, char *argv[])
{
cout << "Node data array size " << NodeData.size() << endl;
cout << "Child pointer array size " << ChildAddrs.size() << endl;
+ cout << "Max node childs " << MaxNumChilds << " (chars " << MaxChildChars << " ) at character index "
+ << MaxChildsPosn << " using password " << PassWithMaxChilds.c_str() << endl;
}
shared_ptr<ofstream> fout;
ostream *Out = &cout;