Source code for cobra_db.encrypt

import re
from datetime import datetime
from typing import Tuple

from Crypto.Hash import SHA512

from cobra_db.enums import PersonalNumberValidity


[docs]class PersonalNumberValidator: """Class to group all methods related to validating personal numbers. Checksum is calculated according to https://en.wikipedia.org/wiki/Personal_identity_number_(Sweden)#Checksum """
[docs] @staticmethod def get_checksum(personal_number: str) -> int: """Calculate the checksum for a personal number :param personal_number: 10 digit pnr. Without any other characters. """ identity = "212121212" multiply = [str(int(p) * int(i)) for p, i in zip(personal_number, identity)] sum_digits = str(sum(int(m) for m in "".join(multiply))) last_digit = sum_digits[-1] return 10 - int(last_digit)
[docs] @staticmethod def validate(personal_number: str) -> PersonalNumberValidity: """ Checks different aspects of validity described in the PnrValidity enum. It is important to store this information along with the hashed PersonalNumber to be able to debug matching problems later. This is because the hashed version of 19000101-1234, 190001011234 and 0001011234 are totally different. At least with this information we can try to modify one side or the other of the matching to fix it. """ validity = PersonalNumberValidity.NOT_VALID # check for characters outside 0-9 p = personal_number personal_number = re.sub(r"[^\da-zA-Z]+", "", personal_number) if p == personal_number: validity = validity | PersonalNumberValidity.CHARS if len(personal_number) == 10: pass elif len(personal_number) == 12: # long version of the personnummer validity = validity | PersonalNumberValidity.LENGTH # make it short since the other two digits are not used in the checksum personal_number = personal_number[2:] else: return validity # must be either 10 or 12 digit to continue try: # parse the date to and raise errors if something wrong datetime.strptime(personal_number[:6], "%y%m%d") validity = validity | PersonalNumberValidity.DATE except ValueError: pass # the date could not be parsed try: # checksum checksum = PersonalNumberValidator.get_checksum(personal_number) if int(personal_number[-1]) == checksum: validity = validity | PersonalNumberValidity.CHECKSUM except ValueError: pass # could not compute checksum except IndexError: pass return validity
[docs]class Hasher: """Hash method for the VAIB project, uses a sha512/256 hashing which is "a one way cryptographic process", i.e. irreversible. """ def __init__(self, secret_salt: str) -> None: """Create an instance of the Hasher that has a predefined salt :param secret_salt: a random string that makes the hash harder to break. It gets prepended to the string that gets hashed. """ # important because when getting the bytes anything can be used as salt. assert type(secret_salt) == str self.secret_salt = secret_salt
[docs] def hash(self, msg: str): """Hash a message :param msg: message that we want to encrypt, normally the personnummer or the StudyID. :return: the encrypted message as hexdigest (in characters from '0' to '9' and 'a' to 'f') """ assert type(msg) == str, f"value is not of type str, {type(msg)}" h = SHA512.new(truncate="256") bytes_str = bytes(f"{self.secret_salt}{msg}", "utf-8") h.update(bytes_str) return str(h.hexdigest())
[docs] def hash_personal_number( self, personal_number: str ) -> Tuple[str, PersonalNumberValidity]: """Hash the personnal number after removing all characters that are not 0-9, a-z, or A-Z. Obtain the validity of the personal number before the hashing. :param personal_number: personal number to be hashed :return: hashed string and validity of the pre-hashed pnr """ pnr = re.sub(r"[^\da-zA-Z]+", "", personal_number) return self.hash(pnr), PersonalNumberValidator.validate(pnr)