From e1eb658db2f04b9235a10d210a1ed551511bff78 Mon Sep 17 00:00:00 2001 From: tony Date: Sat, 4 Mar 2017 16:49:38 +0000 Subject: Calculate the required size for PossChars based on the dictionary used. The fix for this by hannob in commit ad2ef761 works for the current dictionary, but would fail if a dictionary is used with 49 or more possible characters for one of its nodes. --- dict-generate.cpp | 43 +++++++++++++++++++++++++++++++++++++++++-- test.c | 25 +++++++++++++++++++------ zxcvbn.c | 3 +-- 3 files changed, 61 insertions(+), 10 deletions(-) diff --git a/dict-generate.cpp b/dict-generate.cpp index 8dd48d4..bf899a3 100644 --- a/dict-generate.cpp +++ b/dict-generate.cpp @@ -95,6 +95,7 @@ public: unsigned int GetAddr() const { return mAddr; } NodeMap_t::iterator ChildBegin() { return mChild.begin(); } NodeMap_t::iterator ChildEnd() { return mChild.end(); } + unsigned int GetNumChild() { return mChild.size(); } int GetNumEnds() const { return mEndings; } NodeSPtr FindChild(char); std::string GetChildChars(); @@ -402,6 +403,9 @@ typedef vector StringIntVect_t; // Variables holding 'interesting' information on the data unsigned int MaxLength, MinLength, NumChars, NumInWords, NumDuplicate; +static string PassWithMaxChilds, MaxChildChars; +static unsigned int MaxNumChilds, MaxChildsPosn; + struct FileInfo { FileInfo() : Words(0), BruteIgnore(0), Accented(0), Dups(0), Used(0), Rank(0) { } @@ -686,6 +690,16 @@ static int CheckWord(NodeSPtr Root, const string & Str) if (e) ++i; + if (p->GetNumChild() > MaxNumChilds) + { + NodeMap_t::iterator Itc; + MaxNumChilds = p->GetNumChild(); + MaxChildsPosn = x; + PassWithMaxChilds = Str; + MaxChildChars.clear(); + for(Itc = p->ChildBegin(); Itc != p->ChildEnd(); ++Itc) + MaxChildChars += Itc->first; + } p = It->second; } @@ -1063,8 +1077,10 @@ static int OutputBinary(ostream *Out, const string & ChkFile, const string & Cha SetPtrs[p->i] = p; } // Output child bitmap + unsigned int CharSetLen = 0; for(Index = 0; Index < SetPtrs.size(); ++Index) { + unsigned int i, j; string::size_type z, y; StringInt *p; memset(Buf, 0, sizeof Buf); @@ -1078,6 +1094,15 @@ static int OutputBinary(ostream *Out, const string & ChkFile, const string & Cha Buf[y/8] |= 1 << (y & 7); } } + // Find max bits set which indicates max number chars ued at a node + for(i = j = 0; i < 8 * sizeof Buf; ++i) + { + if (Buf[i/8] & (1 << (i & 7))) + ++j; + } + if (j > CharSetLen) + CharSetLen = j; + Out->write((char *)Buf, BytePerEntry); h(Buf, BytePerEntry); } @@ -1127,7 +1152,8 @@ static int OutputBinary(ostream *Out, const string & ChkFile, const string & Cha "#define BITS_CHILD_PATT_INDEX " << BITS_CHILD_PATT_INDEX << "\n" "#define BITS_CHILD_MAP_INDEX " << BITS_CHILD_MAP_INDEX << "\n" "#define SHIFT_CHILD_MAP_INDEX BITS_CHILD_PATT_INDEX\n" - "#define SHIFT_WORD_ENDING_BIT (SHIFT_CHILD_MAP_INDEX + BITS_CHILD_MAP_INDEX)" << endl; + "#define SHIFT_WORD_ENDING_BIT (SHIFT_CHILD_MAP_INDEX + BITS_CHILD_MAP_INDEX)\n" + "#define CHARSET_SIZE " << (CharSetLen + 1) << endl; f.close(); } return OutputSize; @@ -1403,9 +1429,12 @@ int OutputCode(ostream *Out, bool Cmnts, const string & CharSet, StringIntSet_t } SetPtrs[p->i] = p; } + unsigned int CharSetLen = 0; x = 999; + Len = 0; for(Index = 0; Index < SetPtrs.size(); ++Index) { + unsigned int i, j; string::size_type z, y; StringInt *p; memset(Buf, 0, sizeof Buf); @@ -1424,6 +1453,14 @@ int OutputCode(ostream *Out, bool Cmnts, const string & CharSet, StringIntSet_t Buf[y/8] |= 1 << (y & 7); } } + // Find max bits set which indicates max number chars ued at a node + for(i = j = 0; i < 8 * sizeof Buf; ++i) + { + if (Buf[i/8] & (1 << (i & 7))) + ++j; + } + if (j > CharSetLen) + CharSetLen = j; for(z = 0; z < BytePerEntry; ++z) { y = Buf[z] & 0xFF; @@ -1447,7 +1484,7 @@ int OutputCode(ostream *Out, bool Cmnts, const string & CharSet, StringIntSet_t x = 999; } } - *Out << "\n};" << endl; + *Out << "\n};\n#define CHARSET_SIZE " << (CharSetLen+1) << endl; // Output the top 8 bits of the node word endings count. Since node with >255 endings have // been placed at the begining, and ther are not too many of them the array is fairly small. @@ -1714,6 +1751,8 @@ int main(int argc, char *argv[]) { cout << "Node data array size " << NodeData.size() << endl; cout << "Child pointer array size " << ChildAddrs.size() << endl; + cout << "Max node childs " << MaxNumChilds << " (chars " << MaxChildChars << " ) at character index " + << MaxChildsPosn << " using password " << PassWithMaxChilds.c_str() << endl; } shared_ptr fout; ostream *Out = &cout; diff --git a/test.c b/test.c index f2e48d5..05dcbf7 100644 --- a/test.c +++ b/test.c @@ -124,6 +124,8 @@ int DoChecks(char *file) int y = 0; int w = 0; int r = 0; + int Less = 0; + int More = 0; FILE *f = fopen(file, "r"); if (f == NULL) { @@ -185,17 +187,28 @@ int DoChecks(char *file) e = ZxcvbnMatch(Pwd, UsrDict, 0); x = e / Ent; /* More than 1% difference is a fail. */ - if ((x > 1.01) || (x < 1.0/1.01)) + if (x > 1.01) { - printf("Line %2d Calculated entropy %5.2f, expected %5.2f <%s>\n", y, e, Ent, Pwd); - r = 1; - break; + ++More; + if (r < 10) + { + printf("Line %2d Calculated entropy %5.2f, expected %5.2f <%s>\n", y, e, Ent, Pwd); + ++r; + } + } + else if (x < 1.0/1.01) + { + ++Less; + if (r < 10) + { + printf("Line %2d Calculated entropy %5.2f, expected %5.2f <%s>\n", y, e, Ent, Pwd); + ++r; + } } ++w; } fclose(f); - if (!r) - printf("Tested %d words\n", w); + printf("Tested %d words, %d with low entropy, %d with high\n", w, Less, More); return r; } diff --git a/zxcvbn.c b/zxcvbn.c index 2e15680..7468586 100644 --- a/zxcvbn.c +++ b/zxcvbn.c @@ -491,9 +491,8 @@ typedef struct uint8_t Leeted[sizeof L33TChr]; uint8_t UnLeet[sizeof L33TChr]; uint8_t LeetCnv[sizeof L33TCnv / LEET_NORM_MAP_SIZE + 1]; - /* uint8_t LeetChr[3]; */ uint8_t First; - uint8_t PossChars[49]; + uint8_t PossChars[CHARSET_SIZE]; } DictWork_t; /********************************************************************************** -- cgit v1.2.3