diff options
Diffstat (limited to 'debian/missing-sources/count_us_census.py')
-rwxr-xr-x | debian/missing-sources/count_us_census.py | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/debian/missing-sources/count_us_census.py b/debian/missing-sources/count_us_census.py new file mode 100755 index 0000000..1830ed9 --- /dev/null +++ b/debian/missing-sources/count_us_census.py @@ -0,0 +1,35 @@ +#!/usr/bin/python +import sys +import codecs + +def usage(): + return ''' +This script converts surname/name data from the US 1990 census into a format zxcvbn +recognizes. To use, first obtain the census files: + +http://www2.census.gov/topics/genealogy/1990surnames + +download dist.all.last, dist.female.first and dist.male.first + +Then run: + +%s dist.all.lst ../data/surnames.txt +%s dist.female.first ../data/female_names.txt +%s dist.male.names ../data/male_names.txt + +for each file. +''' % [sys.argv[0]] * 3 + +def main(input_filename, output_filename): + with codecs.open(output_filename, 'w', 'utf8') as f: + for line in codecs.open(input_filename, 'r', 'utf8'): + if line.strip(): + name = line.split()[0].lower() + f.write(name+'\n') + +if __name__ == '__main__': + if len(sys.argv) != 3: + print usage() + else: + main(*sys.argv[1:]) + sys.exit(0) |