Source code for deriva.core.utils.hash_utils

import os
import hashlib
import base64
import binascii
import logging


[docs]def compute_hashes(obj, hashes=frozenset(['md5'])): """ Digests input data read from file-like object fd or passed directly as bytes-like object. Compute hashes for multiple algorithms. Default is MD5. Returns a tuple of a hex-encoded digest string and a base64-encoded value suitable for an HTTP header. """ if not (hasattr(obj, 'read') or isinstance(obj, bytes)): raise ValueError("Cannot compute hash for given input: a file-like object or bytes-like object is required") hashers = dict() for alg in hashes: try: hashers[alg] = hashlib.new(alg.lower()) except ValueError: logging.warning("Unable to validate file contents using unknown hash algorithm: %s", alg) while True: if hasattr(obj, 'read'): block = obj.read(1024 ** 2) else: block = obj obj = None if not block: break for i in hashers.values(): i.update(block) hashes = dict() for alg, h in hashers.items(): digest = h.hexdigest() base64digest = base64.b64encode(h.digest()) # base64.b64encode returns str on python 2.7 and bytes on 3.x, so deal with that and always return a str if not isinstance(base64digest, str) and isinstance(base64digest, bytes): base64digest = base64digest.decode('ascii') hashes[alg] = digest, base64digest return hashes
[docs]def compute_file_hashes(file_path, hashes=frozenset(['md5'])): """ Digests data read from file denoted by file_path. """ if not os.path.exists(file_path): raise FileNotFoundError(file_path) else: logging.debug("Computing [%s] hashes for file [%s]" % (','.join(hashes), file_path)) try: with open(file_path, 'rb') as fd: return compute_hashes(fd, hashes) except (IOError, OSError) as e: logging.warning("Error while calculating digest(s) for file %s: %s" % (file_path, str(e))) raise
[docs]def decodeBase64toHex(base64str): result = binascii.hexlify(base64.standard_b64decode(base64str)) if isinstance(result, bytes): result = result.decode('ascii') return result