416 lines
14 KiB
Python

"""passlib.pbkdf2 - PBKDF2 support
this module is getting increasingly poorly named.
maybe rename to "kdf" since it's getting more key derivation functions added.
"""
#=============================================================================
# imports
#=============================================================================
# core
import hashlib
import logging; log = logging.getLogger(__name__)
import re
from struct import pack
from warnings import warn
# site
try:
from M2Crypto import EVP as _EVP
except ImportError:
_EVP = None
# pkg
from passlib.exc import PasslibRuntimeWarning, ExpectedTypeError
from passlib.utils import join_bytes, to_native_str, bytes_to_int, int_to_bytes, join_byte_values
from passlib.utils.compat import b, bytes, BytesIO, irange, callable, int_types
# local
__all__ = [
"get_prf",
"pbkdf1",
"pbkdf2",
]
#=============================================================================
# hash helpers
#=============================================================================
# known hash names
_nhn_formats = dict(hashlib=0, iana=1)
_nhn_hash_names = [
# (hashlib/ssl name, iana name or standin, ... other known aliases)
# hashes with official IANA-assigned names
# (as of 2012-03 - http://www.iana.org/assignments/hash-function-text-names)
("md2", "md2"),
("md5", "md5"),
("sha1", "sha-1"),
("sha224", "sha-224", "sha2-224"),
("sha256", "sha-256", "sha2-256"),
("sha384", "sha-384", "sha2-384"),
("sha512", "sha-512", "sha2-512"),
# hashlib/ssl-supported hashes without official IANA names,
# hopefully compatible stand-ins have been chosen.
("md4", "md4"),
("sha", "sha-0", "sha0"),
("ripemd", "ripemd"),
("ripemd160", "ripemd-160"),
]
# cache for norm_hash_name()
_nhn_cache = {}
def norm_hash_name(name, format="hashlib"):
"""Normalize hash function name
:arg name:
Original hash function name.
This name can be a Python :mod:`~hashlib` digest name,
a SCRAM mechanism name, IANA assigned hash name, etc.
Case is ignored, and underscores are converted to hyphens.
:param format:
Naming convention to normalize to.
Possible values are:
* ``"hashlib"`` (the default) - normalizes name to be compatible
with Python's :mod:`!hashlib`.
* ``"iana"`` - normalizes name to IANA-assigned hash function name.
for hashes which IANA hasn't assigned a name for, issues a warning,
and then uses a heuristic to give a "best guess".
:returns:
Hash name, returned as native :class:`!str`.
"""
# check cache
try:
idx = _nhn_formats[format]
except KeyError:
raise ValueError("unknown format: %r" % (format,))
try:
return _nhn_cache[name][idx]
except KeyError:
pass
orig = name
# normalize input
if not isinstance(name, str):
name = to_native_str(name, 'utf-8', 'hash name')
name = re.sub("[_ /]", "-", name.strip().lower())
if name.startswith("scram-"):
name = name[6:]
if name.endswith("-plus"):
name = name[:-5]
# look through standard names and known aliases
def check_table(name):
for row in _nhn_hash_names:
if name in row:
_nhn_cache[orig] = row
return row[idx]
result = check_table(name)
if result:
return result
# try to clean name up, and recheck table
m = re.match("^(?P<name>[a-z]+)-?(?P<rev>\d)?-?(?P<size>\d{3,4})?$", name)
if m:
name, rev, size = m.group("name", "rev", "size")
if rev:
name += rev
if size:
name += "-" + size
result = check_table(name)
if result:
return result
# else we've done what we can
warn("norm_hash_name(): unknown hash: %r" % (orig,), PasslibRuntimeWarning)
name2 = name.replace("-", "")
row = _nhn_cache[orig] = (name2, name)
return row[idx]
# TODO: get_hash() func which wraps norm_hash_name(), hashlib.<attr>, and hashlib.new
#=============================================================================
# general prf lookup
#=============================================================================
_BNULL = b('\x00')
_XY_DIGEST = b(',\x1cb\xe0H\xa5\x82M\xfb>\xd6\x98\xef\x8e\xf9oQ\x85\xa3i')
_trans_5C = join_byte_values((x ^ 0x5C) for x in irange(256))
_trans_36 = join_byte_values((x ^ 0x36) for x in irange(256))
def _get_hmac_prf(digest):
"helper to return HMAC prf for specific digest"
def tag_wrapper(prf):
prf.__name__ = "hmac_" + digest
prf.__doc__ = ("hmac_%s(key, msg) -> digest;"
" generated by passlib.utils.pbkdf2.get_prf()" %
digest)
if _EVP and digest == "sha1":
# use m2crypto function directly for sha1, since that's it's default digest
try:
result = _EVP.hmac(b('x'),b('y'))
except ValueError: # pragma: no cover
pass
else:
if result == _XY_DIGEST:
return _EVP.hmac, 20
# don't expect to ever get here, but will fall back to pure-python if we do.
warn("M2Crypto.EVP.HMAC() returned unexpected result " # pragma: no cover -- sanity check
"during Passlib self-test!", PasslibRuntimeWarning)
elif _EVP:
# use m2crypto if it's present and supports requested digest
try:
result = _EVP.hmac(b('x'), b('y'), digest)
except ValueError:
pass
else:
# it does. so use M2Crypto's hmac & digest code
hmac_const = _EVP.hmac
def prf(key, msg):
return hmac_const(key, msg, digest)
digest_size = len(result)
tag_wrapper(prf)
return prf, digest_size
# fall back to hashlib-based implementation
digest_const = getattr(hashlib, digest, None)
if not digest_const:
raise ValueError("unknown hash algorithm: %r" % (digest,))
tmp = digest_const()
block_size = tmp.block_size
assert block_size >= 16, "unacceptably low block size"
digest_size = tmp.digest_size
del tmp
def prf(key, msg):
# simplified version of stdlib's hmac module
if len(key) > block_size:
key = digest_const(key).digest()
key += _BNULL * (block_size - len(key))
tmp = digest_const(key.translate(_trans_36) + msg).digest()
return digest_const(key.translate(_trans_5C) + tmp).digest()
tag_wrapper(prf)
return prf, digest_size
# cache mapping prf name/func -> (func, digest_size)
_prf_cache = {}
def _clear_prf_cache():
"helper for unit tests"
_prf_cache.clear()
def get_prf(name):
"""lookup pseudo-random family (prf) by name.
:arg name:
this must be the name of a recognized prf.
currently this only recognizes names with the format
:samp:`hmac-{digest}`, where :samp:`{digest}`
is the name of a hash function such as
``md5``, ``sha256``, etc.
this can also be a callable with the signature
``prf(secret, message) -> digest``,
in which case it will be returned unchanged.
:raises ValueError: if the name is not known
:raises TypeError: if the name is not a callable or string
:returns:
a tuple of :samp:`({func}, {digest_size})`.
* :samp:`{func}` is a function implementing
the specified prf, and has the signature
``func(secret, message) -> digest``.
* :samp:`{digest_size}` is an integer indicating
the number of bytes the function returns.
usage example::
>>> from passlib.utils.pbkdf2 import get_prf
>>> hmac_sha256, dsize = get_prf("hmac-sha256")
>>> hmac_sha256
<function hmac_sha256 at 0x1e37c80>
>>> dsize
32
>>> digest = hmac_sha256('password', 'message')
this function will attempt to return the fastest implementation
it can find; if M2Crypto is present, and supports the specified prf,
:func:`M2Crypto.EVP.hmac` will be used behind the scenes.
"""
global _prf_cache
if name in _prf_cache:
return _prf_cache[name]
if isinstance(name, str):
if name.startswith("hmac-") or name.startswith("hmac_"):
retval = _get_hmac_prf(name[5:])
else:
raise ValueError("unknown prf algorithm: %r" % (name,))
elif callable(name):
# assume it's a callable, use it directly
digest_size = len(name(b('x'),b('y')))
retval = (name, digest_size)
else:
raise ExpectedTypeError(name, "str or callable", "prf name")
_prf_cache[name] = retval
return retval
#=============================================================================
# pbkdf1 support
#=============================================================================
def pbkdf1(secret, salt, rounds, keylen=None, hash="sha1"):
"""pkcs#5 password-based key derivation v1.5
:arg secret: passphrase to use to generate key
:arg salt: salt string to use when generating key
:param rounds: number of rounds to use to generate key
:arg keylen: number of bytes to generate (if ``None``, uses digest's native size)
:param hash:
hash function to use. must be name of a hash recognized by hashlib.
:returns:
raw bytes of generated key
.. note::
This algorithm has been deprecated, new code should use PBKDF2.
Among other limitations, ``keylen`` cannot be larger
than the digest size of the specified hash.
"""
# validate secret & salt
if not isinstance(secret, bytes):
raise ExpectedTypeError(secret, "bytes", "secret")
if not isinstance(salt, bytes):
raise ExpectedTypeError(salt, "bytes", "salt")
# validate rounds
if not isinstance(rounds, int_types):
raise ExpectedTypeError(rounds, "int", "rounds")
if rounds < 1:
raise ValueError("rounds must be at least 1")
# resolve hash
try:
hash_const = getattr(hashlib, hash)
except AttributeError:
# check for ssl hash
# NOTE: if hash unknown, new() will throw ValueError, which we'd just
# reraise anyways; so instead of checking, we just let it get
# thrown during first use, below
# TODO: use builtin md4 class if hashlib doesn't have it.
def hash_const(msg):
return hashlib.new(hash, msg)
# prime pbkdf1 loop, get block size
block = hash_const(secret + salt).digest()
# validate keylen
if keylen is None:
keylen = len(block)
elif not isinstance(keylen, int_types):
raise ExpectedTypeError(keylen, "int or None", "keylen")
elif keylen < 0:
raise ValueError("keylen must be at least 0")
elif keylen > len(block):
raise ValueError("keylength too large for digest: %r > %r" %
(keylen, len(block)))
# main pbkdf1 loop
for _ in irange(rounds-1):
block = hash_const(block).digest()
return block[:keylen]
#=============================================================================
# pbkdf2
#=============================================================================
MAX_BLOCKS = 0xffffffff # 2**32-1
MAX_HMAC_SHA1_KEYLEN = MAX_BLOCKS*20
# NOTE: the pbkdf2 spec does not specify a maximum number of rounds.
# however, many of the hashes in passlib are currently clamped
# at the 32-bit limit, just for sanity. once realistic pbkdf2 rounds
# start approaching 24 bits, this limit will be raised.
def pbkdf2(secret, salt, rounds, keylen=None, prf="hmac-sha1"):
"""pkcs#5 password-based key derivation v2.0
:arg secret: passphrase to use to generate key
:arg salt: salt string to use when generating key
:param rounds: number of rounds to use to generate key
:arg keylen:
number of bytes to generate.
if set to ``None``, will use digest size of selected prf.
:param prf:
psuedo-random family to use for key strengthening.
this can be any string or callable accepted by :func:`get_prf`.
this defaults to ``"hmac-sha1"`` (the only prf explicitly listed in
the PBKDF2 specification)
:returns:
raw bytes of generated key
"""
# validate secret & salt
if not isinstance(secret, bytes):
raise ExpectedTypeError(secret, "bytes", "secret")
if not isinstance(salt, bytes):
raise ExpectedTypeError(salt, "bytes", "salt")
# validate rounds
if not isinstance(rounds, int_types):
raise ExpectedTypeError(rounds, "int", "rounds")
if rounds < 1:
raise ValueError("rounds must be at least 1")
# validate keylen
if keylen is not None:
if not isinstance(keylen, int_types):
raise ExpectedTypeError(keylen, "int or None", "keylen")
elif keylen < 0:
raise ValueError("keylen must be at least 0")
# special case for m2crypto + hmac-sha1
if prf == "hmac-sha1" and _EVP:
if keylen is None:
keylen = 20
# NOTE: doing check here, because M2crypto won't take 'long' instances
# (which this is when running under 32bit)
if keylen > MAX_HMAC_SHA1_KEYLEN:
raise ValueError("key length too long for digest")
# NOTE: as of 2012-4-4, m2crypto has buffer overflow issue
# which may cause segfaults if keylen > 32 (EVP_MAX_KEY_LENGTH).
# therefore we're avoiding m2crypto for large keys until that's fixed.
# see https://bugzilla.osafoundation.org/show_bug.cgi?id=13052
if keylen < 32:
return _EVP.pbkdf2(secret, salt, rounds, keylen)
# resolve prf
prf_func, digest_size = get_prf(prf)
if keylen is None:
keylen = digest_size
# figure out how many blocks we'll need
block_count = (keylen+digest_size-1)//digest_size
if block_count >= MAX_BLOCKS:
raise ValueError("key length too long for digest")
# build up result from blocks
def gen():
for i in irange(block_count):
digest = prf_func(secret, salt + pack(">L", i+1))
accum = bytes_to_int(digest)
for _ in irange(rounds-1):
digest = prf_func(secret, digest)
accum ^= bytes_to_int(digest)
yield int_to_bytes(accum, digest_size)
return join_bytes(gen())[:keylen]
#=============================================================================
# eof
#=============================================================================