[Python-modules-commits] [python-zxcvbn] 08/08: new upstream version

Sabino Par sprab-guest at moszumanska.debian.org
Sat May 6 17:48:08 UTC 2017


This is an automated email from the git hooks/post-receive script.

sprab-guest pushed a commit to branch master
in repository python-zxcvbn.

commit b103f11e5282096ca950871e582030150ab4d6d1
Author: sprab-guest <sprab at onenetbeyond.org>
Date:   Wed Apr 19 23:04:23 2017 +0200

    new upstream version
---
 zxcvbn/__init__.py         |  34 ++-
 zxcvbn/adjacency_graphs.py |   7 +
 zxcvbn/scoring.py          | 613 +++++++++++++++++++++++++--------------------
 zxcvbn/time_estimates.py   |  77 ++++++
 4 files changed, 449 insertions(+), 282 deletions(-)

diff --git a/zxcvbn/__init__.py b/zxcvbn/__init__.py
index 7b444a4..11964eb 100644
--- a/zxcvbn/__init__.py
+++ b/zxcvbn/__init__.py
@@ -1,18 +1,26 @@
-from zxcvbn import main
+from datetime import datetime
 
-__all__ = ['password_strength']
+from . import matching, scoring, time_estimates, feedback
 
-password_strength = main.password_strength
 
+def zxcvbn(password, user_inputs=None):
+    if user_inputs is None:
+        user_inputs = []
 
-if __name__ == '__main__':
-    import fileinput
-    ignored = ('match_sequence', 'password')
+    start = datetime.now()
 
-    for line in fileinput.input():
-        pw = line.strip()
-        print "Password: " + pw
-        out = password_strength(pw)
-        for key, value in out.iteritems():
-            if key not in ignored:
-                print "\t%s: %s" % (key, value)
+    sanitized_inputs = [str(arg).lower() for arg in user_inputs]
+    matching.set_user_input_dictionary(sanitized_inputs)
+
+    matches = matching.omnimatch(password)
+    result = scoring.most_guessable_match_sequence(password, matches)
+    result['calc_time'] = datetime.now() - start
+
+    attack_times = time_estimates.estimate_attack_times(result['guesses'])
+    for prop, val in attack_times.items():
+        result[prop] = val
+
+    result['feedback'] = feedback.get_feedback(result['score'],
+                                               result['sequence'])
+
+    return result
diff --git a/zxcvbn/adjacency_graphs.py b/zxcvbn/adjacency_graphs.py
new file mode 100644
index 0000000..6fd08d2
--- /dev/null
+++ b/zxcvbn/adjacency_graphs.py
@@ -0,0 +1,7 @@
+# generated by scripts/build_keyboard_adjacency_graphs.py
+ADJACENCY_GRAPHS = {
+    "qwerty": {"!": ["`~", None, None, "2@", "qQ", None], "\"": [";:", "[{", "]}", None, None, "/?"], "#": ["2@", None, None, "4$", "eE", "wW"], "$": ["3#", None, None, "5%", "rR", "eE"], "%": ["4$", None, None, "6^", "tT", "rR"], "&": ["6^", None, None, "8*", "uU", "yY"], "'": [";:", "[{", "]}", None, None, "/?"], "(": ["8*", None, None, "0)", "oO", "iI"], ")": ["9(", None, None, "-_", "pP", "oO"], "*": ["7&", None, None, "9(", "iI", "uU"], "+": ["-_", None, None, None, "]}", "[{"], "," [...]
+    "dvorak": {"!": ["`~", None, None, "2@", "'\"", None], "\"": [None, "1!", "2@", ",<", "aA", None], "#": ["2@", None, None, "4$", ".>", ",<"], "$": ["3#", None, None, "5%", "pP", ".>"], "%": ["4$", None, None, "6^", "yY", "pP"], "&": ["6^", None, None, "8*", "gG", "fF"], "'": [None, "1!", "2@", ",<", "aA", None], "(": ["8*", None, None, "0)", "rR", "cC"], ")": ["9(", None, None, "[{", "lL", "rR"], "*": ["7&", None, None, "9(", "cC", "gG"], "+": ["/?", "]}", None, "\\|", None, "-_"], " [...]
+    "keypad": {"*": ["/", None, None, None, "-", "+", "9", "8"], "+": ["9", "*", "-", None, None, None, None, "6"], "-": ["*", None, None, None, None, None, "+", "9"], ".": ["0", "2", "3", None, None, None, None, None], "/": [None, None, None, None, "*", "9", "8", "7"], "0": [None, "1", "2", "3", ".", None, None, None], "1": [None, None, "4", "5", "2", "0", None, None], "2": ["1", "4", "5", "6", "3", ".", "0", None], "3": ["2", "5", "6", None, None, None, ".", "0"], "4": [None, None, "7" [...]
+    "mac_keypad": {"*": ["/", None, None, None, None, None, "-", "9"], "+": ["6", "9", "-", None, None, None, None, "3"], "-": ["9", "/", "*", None, None, None, "+", "6"], ".": ["0", "2", "3", None, None, None, None, None], "/": ["=", None, None, None, "*", "-", "9", "8"], "0": [None, "1", "2", "3", ".", None, None, None], "1": [None, None, "4", "5", "2", "0", None, None], "2": ["1", "4", "5", "6", "3", ".", "0", None], "3": ["2", "5", "6", "+", None, None, ".", "0"], "4": [None, None, " [...]
+}
\ No newline at end of file
diff --git a/zxcvbn/scoring.py b/zxcvbn/scoring.py
index 8903f53..250ab49 100644
--- a/zxcvbn/scoring.py
+++ b/zxcvbn/scoring.py
@@ -1,343 +1,418 @@
-import math
+from math import log, factorial
+
 import re
 
-from zxcvbn.matching import (KEYBOARD_STARTING_POSITIONS, KEYBOARD_AVERAGE_DEGREE,
-                             KEYPAD_STARTING_POSITIONS, KEYPAD_AVERAGE_DEGREE)
+from .adjacency_graphs import ADJACENCY_GRAPHS
+
+
+def calc_average_degree(graph):
+    average = 0
+
+    for key, neighbors in graph.items():
+        average += len([n for n in neighbors if n])
+    average /= float(len(graph.items()))
+
+    return average
+
+
+BRUTEFORCE_CARDINALITY = 10
+MIN_GUESSES_BEFORE_GROWING_SEQUENCE = 10000
+MIN_SUBMATCH_GUESSES_SINGLE_CHAR = 10
+MIN_SUBMATCH_GUESSES_MULTI_CHAR = 50
+
+MIN_YEAR_SPACE = 20
+REFERENCE_YEAR = 2017
 
-def binom(n, k):
-    """
-    Returns binomial coefficient (n choose k).
-    """
-    # http://blog.plover.com/math/choose.html
+
+def nCk(n, k):
+    """http://blog.plover.com/math/choose.html"""
     if k > n:
         return 0
     if k == 0:
         return 1
-    result = 1
-    for denom in range(1, k + 1):
-        result *= n
-        result /= denom
+
+    r = 1
+    for d in range(1, k + 1):
+        r *= n
+        r /= d
         n -= 1
-    return result
 
+    return r
 
-def lg(n):
-    """
-    Returns logarithm of n in base 2.
-    """
-    return math.log(n, 2)
 
 # ------------------------------------------------------------------------------
-# minimum entropy search -------------------------------------------------------
+# search --- most guessable match sequence -------------------------------------
 # ------------------------------------------------------------------------------
 #
-# takes a list of overlapping matches, returns the non-overlapping sublist with
-# minimum entropy. O(nm) dp alg for length-n password with m candidate matches.
+# takes a sequence of overlapping matches, returns the non-overlapping sequence with
+# minimum guesses. the following is a O(l_max * (n + m)) dynamic programming algorithm
+# for a length-n password with m candidate matches. l_max is the maximum optimal
+# sequence length spanning each prefix of the password. In practice it rarely exceeds 5 and the
+# search terminates rapidly.
+#
+# the optimal "minimum guesses" sequence is here defined to be the sequence that
+# minimizes the following function:
+#
+#    g = l! * Product(m.guesses for m in sequence) + D^(l - 1)
+#
+# where l is the length of the sequence.
+#
+# the factorial term is the number of ways to order l patterns.
+#
+# the D^(l-1) term is another length penalty, roughly capturing the idea that an
+# attacker will try lower-length sequences first before trying length-l sequences.
+#
+# for example, consider a sequence that is date-repeat-dictionary.
+#  - an attacker would need to try other date-repeat-dictionary combinations,
+#    hence the product term.
+#  - an attacker would need to try repeat-date-dictionary, dictionary-repeat-date,
+#    ..., hence the factorial term.
+#  - an attacker would also likely try length-1 (dictionary) and length-2 (dictionary-date)
+#    sequences before length-3. assuming at minimum D guesses per pattern type,
+#    D^(l-1) approximates Sum(D^i for i in [1..l-1]
+#
 # ------------------------------------------------------------------------------
-def get(a, i):
-    if i < 0 or i >= len(a):
-        return 0
-    return a[i]
-
-
-def minimum_entropy_match_sequence(password, matches):
-    """
-    Returns minimum entropy
-
-    Takes a list of overlapping matches, returns the non-overlapping sublist with
-    minimum entropy. O(nm) dp alg for length-n password with m candidate matches.
-    """
-    bruteforce_cardinality = calc_bruteforce_cardinality(password) # e.g. 26 for lowercase
-    up_to_k = [0] * len(password) # minimum entropy up to k.
-    # for the optimal sequence of matches up to k, holds the final match (match['j'] == k). null means the sequence ends
-    # without a brute-force character.
-    backpointers = []
-    for k in range(0, len(password)):
-        # starting scenario to try and beat: adding a brute-force character to the minimum entropy sequence at k-1.
-        up_to_k[k] = get(up_to_k, k-1) + lg(bruteforce_cardinality)
-        backpointers.append(None)
-        for match in matches:
-            if match['j'] != k:
-                continue
-            i, j = match['i'], match['j']
-            # see if best entropy up to i-1 + entropy of this match is less than the current minimum at j.
-            up_to = get(up_to_k, i-1)
-            candidate_entropy = up_to + calc_entropy(match)
-            if candidate_entropy < up_to_k[j]:
-                #print "New minimum: using " + str(match)
-                #print "Entropy: " + str(candidate_entropy)
-                up_to_k[j] = candidate_entropy
-                backpointers[j] = match
-
-    # walk backwards and decode the best sequence
-    match_sequence = []
-    k = len(password) - 1
-    while k >= 0:
-        match = backpointers[k]
-        if match:
-            match_sequence.append(match)
-            k = match['i'] - 1
-        else:
-            k -= 1
-    match_sequence.reverse()
+def most_guessable_match_sequence(password, matches, _exclude_additive=False):
+    n = len(password)
+
+    # partition matches into sublists according to ending index j
+    matches_by_j = [[] for _ in range(n)]
+    try:
+        for m in matches:
+            matches_by_j[m['j']].append(m)
+    except TypeError:
+        pass
+    # small detail: for deterministic output, sort each sublist by i.
+    for lst in matches_by_j:
+        lst.sort(key=lambda m1: m1['i'])
+
+    optimal = {
+        # optimal.m[k][l] holds final match in the best length-l match sequence
+        # covering the password prefix up to k, inclusive.
+        # if there is no length-l sequence that scores better (fewer guesses)
+        # than a shorter match sequence spanning the same prefix,
+        # optimal.m[k][l] is undefined.
+        'm': [{} for _ in range(n)],
+
+        # same structure as optimal.m -- holds the product term Prod(m.guesses
+        # for m in sequence). optimal.pi allows for fast (non-looping) updates
+        # to the minimization function.
+        'pi': [{} for _ in range(n)],
+
+        # same structure as optimal.m -- holds the overall metric.
+        'g': [{} for _ in range(n)],
+    }
 
-    # fill in the blanks between pattern matches with bruteforce "matches"
-    # that way the match sequence fully covers the password: match1.j == match2.i - 1 for every adjacent match1, match2.
+    # helper: considers whether a length-l sequence ending at match m is better
+    # (fewer guesses) than previously encountered sequences, updating state if
+    # so.
+    def update(m, l):
+        k = m['j']
+        pi = estimate_guesses(m, password)
+        if l > 1:
+            # we're considering a length-l sequence ending with match m:
+            # obtain the product term in the minimization function by
+            # multiplying m's guesses by the product of the length-(l-1)
+            # sequence ending just before m, at m.i - 1.
+            pi *= optimal['pi'][m['i'] - 1][l - 1]
+        # calculate the minimization func
+        g = factorial(l) * pi
+        if not _exclude_additive:
+            g += MIN_GUESSES_BEFORE_GROWING_SEQUENCE ** (l - 1)
+
+        # update state if new best.
+        # first see if any competing sequences covering this prefix, with l or
+        # fewer matches, fare better than this sequence. if so, skip it and
+        # return.
+        for competing_l, competing_g in optimal['g'][k].items():
+            if competing_l > l:
+                continue
+            if competing_g <= g:
+                return
+
+        # this sequence might be part of the final optimal sequence.
+        optimal['g'][k][l] = g
+        optimal['m'][k][l] = m
+        optimal['pi'][k][l] = pi
+
+    # helper: evaluate bruteforce matches ending at k.
+    def bruteforce_update(k):
+        # see if a single bruteforce match spanning the k-prefix is optimal.
+        m = make_bruteforce_match(0, k)
+        update(m, 1)
+        for i in range(1, k):
+            # generate k bruteforce matches, spanning from (i=1, j=k) up to
+            # (i=k, j=k). see if adding these new matches to any of the
+            # sequences in optimal[i-1] leads to new bests.
+            m = make_bruteforce_match(i, k)
+            for l, last_m in optimal['m'][i - 1].items():
+                l = int(l)
+
+                # corner: an optimal sequence will never have two adjacent
+                # bruteforce matches. it is strictly better to have a single
+                # bruteforce match spanning the same region: same contribution
+                # to the guess product with a lower length.
+                # --> safe to skip those cases.
+                if last_m.get('pattern', False) == 'bruteforce':
+                    continue
+
+                # try adding m to this length-l sequence.
+                update(m, l + 1)
+
+    # helper: make bruteforce match objects spanning i to j, inclusive.
     def make_bruteforce_match(i, j):
         return {
             'pattern': 'bruteforce',
+            'token': password[i:j + 1],
             'i': i,
             'j': j,
-            'token': password[i:j+1],
-            'entropy': lg(math.pow(bruteforce_cardinality, j - i + 1)),
-            'cardinality': bruteforce_cardinality,
         }
-    k = 0
-    match_sequence_copy = []
-    for match in match_sequence:
-        i, j = match['i'], match['j']
-        if i - k > 0:
-            match_sequence_copy.append(make_bruteforce_match(k, i - 1))
-        k = j + 1
-        match_sequence_copy.append(match)
-
-    if k < len(password):
-        match_sequence_copy.append(make_bruteforce_match(k, len(password) - 1))
-    match_sequence = match_sequence_copy
-
-    min_entropy = 0 if len(password) == 0 else up_to_k[len(password) - 1] # corner case is for an empty password ''
-    crack_time = entropy_to_crack_time(min_entropy)
+
+    # helper: step backwards through optimal.m starting at the end,
+    # constructing the final optimal match sequence.
+    def unwind(n):
+        optimal_match_sequence = []
+        k = n - 1
+        # find the final best sequence length and score
+        l = None
+        g = float('inf')
+        for candidate_l, candidate_g in optimal['g'][k].items():
+            if candidate_g < g:
+                l = candidate_l
+                g = candidate_g
+
+        while k >= 0:
+            m = optimal['m'][k][l]
+            optimal_match_sequence.insert(0, m)
+            k = m['i'] - 1
+            l -= 1
+
+        return optimal_match_sequence
+
+    for k in range(n):
+        for m in matches_by_j[k]:
+            if m['i'] > 0:
+                for l in optimal['m'][m['i'] - 1]:
+                    l = int(l)
+                    update(m, l + 1)
+            else:
+                update(m, 1)
+        bruteforce_update(k)
+
+    optimal_match_sequence = unwind(n)
+    optimal_l = len(optimal_match_sequence)
+
+    # corner: empty password
+    if len(password) == 0:
+        guesses = 1
+    else:
+        guesses = optimal['g'][n - 1][optimal_l]
 
     # final result object
     return {
         'password': password,
-        'entropy': round_to_x_digits(min_entropy, 3),
-        'match_sequence': match_sequence,
-        'crack_time': round_to_x_digits(crack_time, 3),
-        'crack_time_display': display_time(crack_time),
-        'score': crack_time_to_score(crack_time),
+        'guesses': guesses,
+        'guesses_log10': log(guesses, 10),
+        'sequence': optimal_match_sequence,
     }
 
 
-def round_to_x_digits(number, digits):
-    """
-    Returns 'number' rounded to 'digits' digits.
-    """
-    return round(number * math.pow(10, digits)) / math.pow(10, digits)
+def estimate_guesses(match, password):
+    if match.get('guesses', False):
+        return match['guesses']
 
-# ------------------------------------------------------------------------------
-# threat model -- stolen hash catastrophe scenario -----------------------------
-# ------------------------------------------------------------------------------
-#
-# assumes:
-# * passwords are stored as salted hashes, different random salt per user.
-#   (making rainbow attacks infeasable.)
-# * hashes and salts were stolen. attacker is guessing passwords at max rate.
-# * attacker has several CPUs at their disposal.
-# ------------------------------------------------------------------------------
+    min_guesses = 1
+    if len(match['token']) < len(password):
+        if len(match['token']) == 1:
+            min_guesses = MIN_SUBMATCH_GUESSES_SINGLE_CHAR
+        else:
+            min_guesses = MIN_SUBMATCH_GUESSES_MULTI_CHAR
+
+    estimation_functions = {
+        'bruteforce': bruteforce_guesses,
+        'dictionary': dictionary_guesses,
+        'spatial': spatial_guesses,
+        'repeat': repeat_guesses,
+        'sequence': sequence_guesses,
+        'regex': regex_guesses,
+        'date': date_guesses,
+    }
 
-# for a hash function like bcrypt/scrypt/PBKDF2, 10ms per guess is a safe lower bound.
-# (usually a guess would take longer -- this assumes fast hardware and a small work factor.)
-# adjust for your site accordingly if you use another hash function, possibly by
-# several orders of magnitude!
-SINGLE_GUESS = .010
-NUM_ATTACKERS = 100 # number of cores guessing in parallel.
+    guesses = estimation_functions[match['pattern']](match)
+    match['guesses'] = max(guesses, min_guesses)
+    match['guesses_log10'] = log(match['guesses'], 10)
 
-SECONDS_PER_GUESS = SINGLE_GUESS / NUM_ATTACKERS
+    return match['guesses']
 
 
-def entropy_to_crack_time(entropy):
-    return (0.5 * math.pow(2, entropy)) * SECONDS_PER_GUESS # average, not total
+def bruteforce_guesses(match):
+    guesses = BRUTEFORCE_CARDINALITY ** len(match['token'])
+    # small detail: make bruteforce matches at minimum one guess bigger than
+    # smallest allowed submatch guesses, such that non-bruteforce submatches
+    # over the same [i..j] take precedence.
+    if len(match['token']) == 1:
+        min_guesses = MIN_SUBMATCH_GUESSES_SINGLE_CHAR + 1
+    else:
+        min_guesses = MIN_SUBMATCH_GUESSES_MULTI_CHAR + 1
 
+    return max(guesses, min_guesses)
 
-def crack_time_to_score(seconds):
-    if seconds < math.pow(10, 2):
-        return 0
-    if seconds < math.pow(10, 4):
-        return 1
-    if seconds < math.pow(10, 6):
-        return 2
-    if seconds < math.pow(10, 8):
-        return 3
-    return 4
 
-# ------------------------------------------------------------------------------
-# entropy calcs -- one function per match pattern ------------------------------
-# ------------------------------------------------------------------------------
+def dictionary_guesses(match):
+    # keep these as properties for display purposes
+    match['base_guesses'] = match['rank']
+    match['uppercase_variations'] = uppercase_variations(match)
+    match['l33t_variations'] = l33t_variations(match)
+    reversed_variations = match.get('reversed', False) and 2 or 1
+
+    return match['base_guesses'] * match['uppercase_variations'] * \
+           match['l33t_variations'] * reversed_variations
+
 
-def calc_entropy(match):
-    if 'entropy' in match: return match['entropy']
-
-    if match['pattern'] == 'repeat':
-        entropy_func = repeat_entropy
-    elif match['pattern'] == 'sequence':
-        entropy_func = sequence_entropy
-    elif match['pattern'] == 'digits':
-        entropy_func = digits_entropy
-    elif match['pattern'] == 'year':
-        entropy_func = year_entropy
-    elif match['pattern'] == 'date':
-        entropy_func = date_entropy
-    elif match['pattern'] == 'spatial':
-        entropy_func = spatial_entropy
-    elif match['pattern'] == 'dictionary':
-        entropy_func = dictionary_entropy
-    match['entropy'] = entropy_func(match)
-    return match['entropy']
-
-
-def repeat_entropy(match):
-    cardinality = calc_bruteforce_cardinality(match['token'])
-    return lg(cardinality * len(match['token']))
-
-
-def sequence_entropy(match):
-    first_chr = match['token'][0]
-    if first_chr in ['a', '1']:
-        base_entropy = 1
+def repeat_guesses(match):
+    return match['base_guesses'] * match['repeat_count']
+
+
+def sequence_guesses(match):
+    first_chr = match['token'][:1]
+    # lower guesses for obvious starting points
+    if first_chr in ['a', 'A', 'z', 'Z', '0', '1', '9']:
+        base_guesses = 4
     else:
-        if first_chr.isdigit():
-            base_entropy = lg(10) # digits
-        elif first_chr.isalpha():
-            base_entropy = lg(26) # lower
+        if re.compile('\d').match(first_chr):
+            base_guesses = 10  # digits
         else:
-            base_entropy = lg(26) + 1 # extra bit for uppercase
+            # could give a higher base for uppercase,
+            # assigning 26 to both upper and lower sequences is more
+            # conservative.
+            base_guesses = 26
     if not match['ascending']:
-        base_entropy += 1 # extra bit for descending instead of ascending
-    return base_entropy + lg(len(match['token']))
+        base_guesses *= 2
 
+    return base_guesses * len(match['token'])
 
-def digits_entropy(match):
-    return lg(math.pow(10, len(match['token'])))
 
+def regex_guesses(match):
+    char_class_bases = {
+        'alpha_lower': 26,
+        'alpha_upper': 26,
+        'alpha': 52,
+        'alphanumeric': 62,
+        'digits': 10,
+        'symbols': 33,
+    }
+    if match['regex_name'] in char_class_bases:
+        return char_class_bases[match['regex_name']] ** len(match['token'])
+    elif match['regex_name'] == 'recent_year':
+        # conservative estimate of year space: num years from REFERENCE_YEAR.
+        # if year is close to REFERENCE_YEAR, estimate a year space of
+        # MIN_YEAR_SPACE.
+        year_space = abs(int(match['regex_match'].group(0)) - REFERENCE_YEAR)
+        year_space = max(year_space, MIN_YEAR_SPACE)
 
-NUM_YEARS = 119 # years match against 1900 - 2019
-NUM_MONTHS = 12
-NUM_DAYS = 31
+        return year_space
 
 
-def year_entropy(match):
-    return lg(NUM_YEARS)
+def date_guesses(match):
+    year_space = max(abs(match['year'] - REFERENCE_YEAR), MIN_YEAR_SPACE)
+    guesses = year_space * 365
+    if match.get('separator', False):
+        guesses *= 4
 
+    return guesses
 
-def date_entropy(match):
-    if match['year'] < 100:
-        entropy = lg(NUM_DAYS * NUM_MONTHS * 100) # two-digit year
-    else:
-        entropy = lg(NUM_DAYS * NUM_MONTHS * NUM_YEARS) # four-digit year
 
-    if match['separator']:
-        entropy += 2 # add two bits for separator selection [/,-,.,etc]
-    return entropy
+KEYBOARD_AVERAGE_DEGREE = calc_average_degree(ADJACENCY_GRAPHS['qwerty'])
+# slightly different for keypad/mac keypad, but close enough
+KEYPAD_AVERAGE_DEGREE = calc_average_degree(ADJACENCY_GRAPHS['keypad'])
 
+KEYBOARD_STARTING_POSITIONS = len(ADJACENCY_GRAPHS['qwerty'].keys())
+KEYPAD_STARTING_POSITIONS = len(ADJACENCY_GRAPHS['keypad'].keys())
 
-def spatial_entropy(match):
+
+def spatial_guesses(match):
     if match['graph'] in ['qwerty', 'dvorak']:
         s = KEYBOARD_STARTING_POSITIONS
         d = KEYBOARD_AVERAGE_DEGREE
     else:
         s = KEYPAD_STARTING_POSITIONS
         d = KEYPAD_AVERAGE_DEGREE
-    possibilities = 0
+    guesses = 0
     L = len(match['token'])
     t = match['turns']
-    # estimate the number of possible patterns w/ length L or less with t turns or less.
+    # estimate the number of possible patterns w/ length L or less with t turns
+    # or less.
     for i in range(2, L + 1):
-        possible_turns = min(t, i - 1)
-        for j in range(1, possible_turns+1):
-            x =  binom(i - 1, j - 1) * s * math.pow(d, j)
-            possibilities += x
-    entropy = lg(possibilities)
-    # add extra entropy for shifted keys. (% instead of 5, A instead of a.)
-    # math is similar to extra entropy from uppercase letters in dictionary matches.
-    if 'shifted_count' in match:
+        possible_turns = min(t, i - 1) + 1
+        for j in range(1, possible_turns):
+            guesses += nCk(i - 1, j - 1) * s * pow(d, j)
+    # add extra guesses for shifted keys. (% instead of 5, A instead of a.)
+    # math is similar to extra guesses of l33t substitutions in dictionary
+    # matches.
+    if match['shifted_count']:
         S = match['shifted_count']
-        U = L - S # unshifted count
-        possibilities = sum(binom(S + U, i) for i in xrange(0, min(S, U) + 1))
-        entropy += lg(possibilities)
-    return entropy
-
+        U = len(match['token']) - match['shifted_count']  # unshifted count
+        if S == 0 or U == 0:
+            guesses *= 2
+        else:
+            shifted_variations = 0
+            for i in range(1, min(S, U) + 1):
+                shifted_variations += nCk(S + U, i)
+            guesses *= shifted_variations
 
-def dictionary_entropy(match):
-    match['base_entropy'] = lg(match['rank']) # keep these as properties for display purposes
-    match['uppercase_entropy'] = extra_uppercase_entropy(match)
-    match['l33t_entropy'] = extra_l33t_entropy(match)
-    ret = match['base_entropy'] + match['uppercase_entropy'] + match['l33t_entropy']
-    return ret
+    return guesses
 
 
 START_UPPER = re.compile('^[A-Z][^A-Z]+$')
 END_UPPER = re.compile('^[^A-Z]+[A-Z]$')
-ALL_UPPER = re.compile('^[A-Z]+$')
+ALL_UPPER = re.compile('^[^a-z]+$')
+ALL_LOWER = re.compile('^[^A-Z]+$')
 
 
-def extra_uppercase_entropy(match):
+def uppercase_variations(match):
     word = match['token']
-    if word.islower():
-        return 0
-    # a capitalized word is the most common capitalization scheme,
-    # so it only doubles the search space (uncapitalized + capitalized): 1 extra bit of entropy.
-    # allcaps and end-capitalized are common enough too, underestimate as 1 extra bit to be safe.
+
+    if ALL_LOWER.match(word) or word.lower() == word:
+        return 1
+
     for regex in [START_UPPER, END_UPPER, ALL_UPPER]:
         if regex.match(word):
-            return 1
-    # Otherwise calculate the number of ways to capitalize U+L uppercase+lowercase letters with U uppercase letters or
-    # less. Or, if there's more uppercase than lower (for e.g. PASSwORD), the number of ways to lowercase U+L letters
-    # with L lowercase letters or less.
-    upp_len = len([x for x in word if x.isupper()])
-    low_len = len([x for x in word if x.islower()])
-    possibilities = sum(binom(upp_len + low_len, i) for i in range(0, min(upp_len, low_len) + 1))
-    return lg(possibilities)
-
-
-def extra_l33t_entropy(match):
-    if 'l33t' not in match or not match['l33t']:
-        return 0
-    possibilities = 0
-    for subbed, unsubbed in match['sub'].items():
-        sub_len = len([x for x in match['token'] if x == subbed])
-        unsub_len = len([x for x in match['token'] if x == unsubbed])
-        possibilities += sum(binom(unsub_len + sub_len, i) for i in range(0, min(unsub_len, sub_len) + 1))
-    # corner: return 1 bit for single-letter subs, like 4pple -> apple, instead of 0.
-    if possibilities <= 1:
+            return 2
+
+    U = sum(1 for c in word if c.isupper())
+    L = sum(1 for c in word if c.islower())
+    variations = 0
+    for i in range(1, min(U, L) + 1):
+        variations += nCk(U + L, i)
+
+    return variations
+
+
+def l33t_variations(match):
+    if not match.get('l33t', False):
         return 1
-    return lg(possibilities)
-
-# utilities --------------------------------------------------------------------
-
-def calc_bruteforce_cardinality(password):
-    lower, upper, digits, symbols = 0, 0, 0, 0
-    for char in password:
-        if char.islower():
-            lower = 26
-        elif char.isdigit():
-            digits = 10
-        elif char.isupper():
-            upper = 26
+
+    variations = 1
+
+    for subbed, unsubbed in match['sub'].items():
+        # lower-case match.token before calculating: capitalization shouldn't
+        # affect l33t calc.
+        chrs = list(match['token'].lower())
+        S = sum(1 for chr in chrs if chr == subbed)
+        U = sum(1 for chr in chrs if chr == unsubbed)
+        if S == 0 or U == 0:
+            # for this sub, password is either fully subbed (444) or fully
+            # unsubbed (aaa) treat that as doubling the space (attacker needs
+            # to try fully subbed chars in addition to unsubbed.)
+            variations *= 2
         else:
-            symbols = 33
-    cardinality = lower + digits + upper + symbols
-    return cardinality
-
-
-def display_time(seconds):
-    minute = 60
-    hour = minute * 60
-    day = hour * 24
-    month = day * 31
-    year = month * 12
-    century = year * 100
-    if seconds < minute:
-        return 'instant'
-    elif seconds < hour:
-        return str(1 + math.ceil(seconds / minute)) + " minutes"
-    elif seconds < day:
-        return str(1 + math.ceil(seconds / hour)) + " hours"
-    elif seconds < month:
-        return str(1 + math.ceil(seconds / day)) + " days"
-    elif seconds < year:
-        return str(1 + math.ceil(seconds / month)) + " months"
-    elif seconds < century:
-        return str(1 + math.ceil(seconds / year)) + " years"
-    else:
-        return 'centuries'
+            # this case is similar to capitalization:
+            # with aa44a, U = 3, S = 2, attacker needs to try unsubbed + one
+            # sub + two subs
+            p = min(U, S)
+            possibilities = 0
+            for i in range(1, p + 1):
+                possibilities += nCk(U + S, i)
+            variations *= possibilities
+
+    return variations
diff --git a/zxcvbn/time_estimates.py b/zxcvbn/time_estimates.py
new file mode 100644
index 0000000..e2d2f26
--- /dev/null
+++ b/zxcvbn/time_estimates.py
@@ -0,0 +1,77 @@
+def estimate_attack_times(guesses):
+    crack_times_seconds = {
+        'online_throttling_100_per_hour': float(guesses) / (100.0 / 3600.0),
+        'online_no_throttling_10_per_second': float(guesses) / 10.0,
+        'offline_slow_hashing_1e4_per_second': float(guesses) / float(1e4),
+        'offline_fast_hashing_1e10_per_second': float(guesses) / float(1e10),
+    }
+
+    crack_times_display = {}
+    for scenario, seconds in crack_times_seconds.items():
+        crack_times_display[scenario] = display_time(seconds)
+
+    return {
+        'crack_times_seconds': crack_times_seconds,
+        'crack_times_display': crack_times_display,
+        'score': guesses_to_score(guesses),
+    }
+
+
+def guesses_to_score(guesses):
+    delta = 5
+
+    if guesses < 1e3 + delta:
+        # risky password: "too guessable"
+        return 0
+    elif guesses < 1e6 + delta:
+        # modest protection from throttled online attacks: "very guessable"
+        return 1
+    elif guesses < 1e8 + delta:
+        # modest protection from unthrottled online attacks: "somewhat
+        # guessable"
+        return 2
+    elif guesses < 1e10 + delta:
+        # modest protection from offline attacks: "safely unguessable"
+        # assuming a salted, slow hash function like bcrypt, scrypt, PBKDF2,
+        # argon, etc
+        return 3
+    else:
+        # strong protection from offline attacks under same scenario: "very
+        # unguessable"
+        return 4
+
+
+def display_time(seconds):
+    minute = 60
+    hour = minute * 60
+    day = hour * 24
+    month = day * 31
+    year = month * 12
+    century = year * 100
+    if seconds < 1:
+        display_num, display_str = None, 'less than a second'
+    elif seconds < minute:
+        base = round(seconds)
+        display_num, display_str = base, '%s second' % base
+    elif seconds < hour:
+        base = round(seconds / minute)
+        display_num, display_str = base, '%s minute' % base
+    elif seconds < day:
+        base = round(seconds / hour)
+        display_num, display_str = base, '%s hour' % base
+    elif seconds < month:
+        base = round(seconds / day)
+        display_num, display_str = base, '%s day' % base
+    elif seconds < year:
+        base = round(seconds / month)
+        display_num, display_str = base, '%s month' % base
+    elif seconds < century:
+        base = round(seconds / year)
+        display_num, display_str = base, '%s year' % base
+    else:
+        display_num, display_str = None, 'centuries'
+
+    if display_num and display_num != 1:
+        display_str += 's'
+
+    return display_str

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-zxcvbn.git



More information about the Python-modules-commits mailing list