aboutsummaryrefslogtreecommitdiffhomepage
path: root/debian/missing-sources/count_us_census.py
diff options
context:
space:
mode:
Diffstat (limited to 'debian/missing-sources/count_us_census.py')
-rwxr-xr-xdebian/missing-sources/count_us_census.py35
1 files changed, 35 insertions, 0 deletions
diff --git a/debian/missing-sources/count_us_census.py b/debian/missing-sources/count_us_census.py
new file mode 100755
index 0000000..1830ed9
--- /dev/null
+++ b/debian/missing-sources/count_us_census.py
@@ -0,0 +1,35 @@
+#!/usr/bin/python
+import sys
+import codecs
+
+def usage():
+ return '''
+This script converts surname/name data from the US 1990 census into a format zxcvbn
+recognizes. To use, first obtain the census files:
+
+http://www2.census.gov/topics/genealogy/1990surnames
+
+download dist.all.last, dist.female.first and dist.male.first
+
+Then run:
+
+%s dist.all.lst ../data/surnames.txt
+%s dist.female.first ../data/female_names.txt
+%s dist.male.names ../data/male_names.txt
+
+for each file.
+''' % [sys.argv[0]] * 3
+
+def main(input_filename, output_filename):
+ with codecs.open(output_filename, 'w', 'utf8') as f:
+ for line in codecs.open(input_filename, 'r', 'utf8'):
+ if line.strip():
+ name = line.split()[0].lower()
+ f.write(name+'\n')
+
+if __name__ == '__main__':
+ if len(sys.argv) != 3:
+ print usage()
+ else:
+ main(*sys.argv[1:])
+ sys.exit(0)