import logging import re from shutil import Error import sys import collections from xmlrpc.client import Boolean from rich.panel import Panel from rich.tree import Tree from rich import print from jinja2 import TemplateNotFound, UndefinedError def add_to_log_tree(tree: Tree, path: tuple, currentindex): if currentindex < len(path): child = tree.add(str(path[currentindex])) currentindex+=1 return add_to_log_tree(child, path, currentindex) else: return tree def log(level: int, title: str, path: tuple, message: str, printout: bool = False): tree=Tree(str(path[0]), highlight=True) last = add_to_log_tree(tree ,path, 1) last.add(message) print(Panel(tree, title="[red]"+str(title), padding=1,title_align="left" )) #def log(level: int, title: str, path: tuple, message: str, printout: bool = False): # tree=Tree(str(path[0])) # print(Panel(tree, title="[red]"+str(title), expand=False,padding=1 )) # line = "" # line = "----------------------------- " + title + " -----------------------------" # logging.log(level, line) # if printout: # print(line) # lvl = 0 # for i in path: # if lvl == 0: # line = " " + str(i) # logging.log(level, line) # if printout: # print(line) # else: # line = (" " * lvl) + " |-- " + str(i) # logging.log(level, line) # if printout: # print(line) # lvl = lvl + 1 # # line = (" " * lvl) + " |--> " + message # logging.log(level, line ) # # if printout: # print(line) # print("------------------------------------------------------------" + "-" * len(title)) class ErrorCollection: def __init__(self): self._errors = [] def add(self, title, path, message): self._errors.append(dict(title = title, path = path, message = message)) def append(self, errors): self._errors += errors._errors @property def count(self) -> int: return len(self._errors) @property def errors(self) -> list: return self._errors class DBEntity: def __init__(self, entityname: str, entity, entitydefaults: dict, entitydbdefinition=None): self.entity = entity self._definition = entitydefaults | (entitydbdefinition or {}) self.name = self._definition.get('name', entityname) self.database = self._definition.get('database', '') self.schema = self._definition.get('schema', '') self.filegroup = self._definition.get('filegroup', '') self.properties = self._definition.get('properties', {}) def get_qualifier(self, include_db: Boolean = True) -> str: """ returns rendered Qualifier""" return self.entity.model.basetemplates.get('table_qualifier').render(dbentity=self, includeDB=include_db) class DataVaultEntityAttribute(object): #TODO: ggf. die Definition der Attribute umstellen von Liste auf dict: # attributes: # - {name: cust_no, type: 'varchar(32)'} # attributes: # cust_no: {type: 'varchar(32)'} # # Pattern zum Ersetzen in VSCODE: suche: - \{name: (.*), Ersetze durch: $1: { # __slots__ = ('_definition' # ,'entity' # ,'name' # ,'datatype' # ,'is_mandatory' # ,'logicalname' # ,'description' # ,'role' # ,'_ghostvalue' ) def __init__(self, entity, definition): """ :rtype: object """ #self.id = uuid.uuid4().hex self._definition = definition self.entity = entity self.name = definition.get('name', '') #self.datatype = definition.get('type', '') # Old: type="varchar(100)" self._type = definition.get('type', '') # New: type='varchar' self.length = definition.get('length', '') self.precision = definition.get('precision', '') self.scale = definition.get('scale', '') self.default = definition.get('default', '') #TODO: self.datatype als property => varchar(100) # self.type als native type self.is_mandatory = definition.get('mandatory', False) self.logicalname = definition.get('logicalname', '') #fixme: in schema aufnehmnen self.description = definition.get('description', '') self.role = definition.get('role', self.entity.model.config.entitydefaults[self.entity.type].get('attribute_role','base')) self._ghostvalue = definition.get('ghost') self.properties = definition.get('props', {}) self.order = definition.get('order') self.is_pii = definition.get('pii', False) @property def datatype(self) -> str: # => Full datatype return self._type @property def ghostvalue(self) -> str: if not self._ghostvalue: return self.entity.model.config.datavault.ghostrecord.get(self.native_datatype.lower(), self.entity.model.config.datavault.ghostrecord.get('other', '')) else: return self._ghostvalue @property def native_datatype(self) -> str: """Returns the native datatype expression. E.g. nvarchar""" return self.datatype[:self.datatype.find('(')].strip().lower() if self.datatype.find( '(') != -1 else self.datatype # IMPROVE: Müsste eigentlich in der Attributdefinition explizit drin stehen. @property def native_datatypelength(self) -> str: """Returns the native datatype length. E.g. nvarchar(100) -> 100 """ return self.datatype[self.datatype.find('(') + 1:self.datatype.find(')')].strip().lower() if self.datatype.find('(') != -1 else '' # IMPROVE: Müsste eigentlich in der Attributdefinition explizit drin stehen. @property def column_definition(self) -> str: """Returns the columndefinition, based on the configured template.""" return self.entity.model.basetemplates.get('column_ddl').render(attribute=self) def copy(self, newname: str = ''): if newname: copy = DataVaultEntityAttribute(self.entity, self._definition) copy.name = newname return copy else: return DataVaultEntityAttribute(self.entity, self._definition) def validate(self, spec): errors = ErrorCollection() if not spec: return errors logging.debug('Validating attribute <%s>',self.name) is_valid = False for datatype, definition in self.entity.model.sys_specifications[spec]['datatypes'].items(): matches = re.findall(definition.get('pattern'), self._type, re.MULTILINE | re.IGNORECASE) for m in matches: is_valid = True if not is_valid: logging.debug('datatype <%s> of attribute <%s> not valid',self._type, self.name) errors.add("VALIDATION ERROR", (self.entity.filename,"Attribute", "<" + self.name + ">"), f'Datatype <{self._type}> not valid (not matching any pattern in {spec})') return errors class DerivedAttribute(DataVaultEntityAttribute): pass class GeneratorEntity: def __init__(self, model, filename: str, definition: dict = None): # logging.info('Creating Entity %s',definition['name']) self.model = model self.filename = filename self._definition = definition self.id = definition.get('name') self.name = definition.get('name') self.type = definition.get('type') self.subtype = definition.get('subtype', 'base') self.description = definition.get('description', '') self.generate = definition.get('generate', 1) self.extra = definition.get('extra', {}) self._sql_pre_hook = definition.get('sql_pre_hook', '') self._sql_post_hook = definition.get('sql_post_hook', '') @property def type_display_name(self) -> str: return self.model.get_types().get(self.type).get('displayname') @property def subtype_display_name(self) -> str: return self.model.get_subtypes().get(self.subtype).get('displayname') def render_template(self, templatefilename: str): """Renders the entity by a given template an returns the result als string.""" try: template = self.model.templateEnvironment.get_template(templatefilename) #print( self.model.templateEnvironment.loader.get_source( self.model.templateEnvironment, templatefilename)) #checksum = hashlib.md5(str(template).encode()).hexdigest().upper() output = template.render( entity=self, templatename=templatefilename, templateversion='' ) except TemplateNotFound: print("") print(Panel(f"[red]Error while rendering entity-templates[/red]: Template {templatefilename} not found.", title="[red]RENDER ERROR", padding=1,title_align="left" )) logging.error(f"Template {templatefilename} not found.") #print(f"Template {templatefilename} not found.") sys.exit(2) except UndefinedError as e: print("") logging.error(f"Error while rendering entity {self.name} :") logging.error(e) print(f"Error while rendering entity {self.name} :", e) sys.exit(2) return output @property def sql_pre_hook(self) -> str: return self.model.templateEnvironment.from_string(self._sql_pre_hook).render(this=self) @property def sql_post_hook(self) -> str: return self.model.templateEnvironment.from_string(self._sql_post_hook).render(this=self) def get_component_entities(self): return [] class Layer(GeneratorEntity): def __init__(self, model, layerid, definition): GeneratorEntity.__init__(self, model, '', definition) self.id = layerid @property def database(self) -> str: return self.model.config.layer.get(self.id).get('defaultdatabaseobject').get('database') @property def schema(self) -> str: return self.model.config.layer.get(self.id).get('defaultdatabaseobject').get('schema') @property def filegroup(self) -> str: return self.model.config.layer.get(self.id).get('defaultdatabaseobject').get('filegroup') @property def sys_specification(self) -> str: return self.model.config.layer.get(self.id).get('sys_specification','') @property def connection_name(self) -> str: return self.model.config.layer.get(self.id).get('connectionname','') def get_entities(self) -> list: return {k: v for k, v in self.model.entities.items() if v.layer == self} def get_entity_count(self): return sum(1 for e in self.model.entities.values() if e.layer == self) class DataVaultEntity(GeneratorEntity): def __init__(self, model, filename: str, definition: dict = None): GeneratorEntity.__init__(self, model, filename, definition) # logging.info('Creating Entity %s',definition['name']) self._layername = definition.get('layer', self.model.config.entitydefaults[self.type]['layer']) self.attributes = collections.OrderedDict() excludecommonattribute = definition.get('exclude_commonattributes', []) commonattributes = (a for a in self.model.config.entitydefaults.get(self.type, {}).get('attributes', []) if a not in excludecommonattribute ) for attribute_rolename in commonattributes: attr = DataVaultEntityAttribute(self, self.model.config.commonattributes.get(attribute_rolename)) attr.role = attribute_rolename self.add_attribute(attr) for attrdef in definition.get('attributes',[]): self.add_attribute(DataVaultEntityAttribute(self, attrdef)) @property def layer(self) -> Layer: """Returns the entity layer.""" return self.model.get_layer(self._layername) @property def dbentity(self) -> DBEntity: return DBEntity(self.name, self, self.model.config.layer.get(self._layername, {}).get('defaultdatabaseobject', {}), self._definition.get('dbentity')) def add_attribute(self, attribute: DataVaultEntityAttribute): """add an attribute to the entity.""" self.attributes[attribute.name] = attribute def get_attribute(self, name: str) -> DataVaultEntityAttribute: """get attribute by name.""" return self.attributes.get(name, self.get_foreign_attribute(name)) def get_attributes(self, roles: list = 'all', exclude: list = ()) -> list[DataVaultEntityAttribute]: """returns a list of attributes for one or more given roles. You can exclude certain attribute-roles""" if 'all' in roles: return [a for a in self.attributes.values() if a.role not in exclude] + self.get_foreign_attributes() elif 'fk' in roles: # IMPROVE: könnte vereinfacht werden, wenn Attributreferenzen bereits aufgelöst wären. return [a for a in self.attributes.values() if a.role in roles] + self.get_foreign_attributes() else: return [a for a in self.attributes.values() if a.role in roles and a.role not in exclude] def get_foreign_attribute(self, name: str) -> DataVaultEntityAttribute: # gibt attribut, dass auf einer verbundenen Entität liegt zurück. Diese Methode ist als Erweiterung für # getAttribute gedacht und muss von den jeweiligen Entitäten implementiert werden. pass def get_foreign_attributes(self) -> DataVaultEntityAttribute: # gibt eine Liste von attributen, die auf einer verbundenen Entität liegen zurück. Diese Methode ist als # Erweiterung für getAttributes gedacht und muss von den jeweiligen Entitäten implementiert werden. return [] def get_role_attribute(self, role: str) -> DataVaultEntityAttribute: # returns a specific role-attribute (recordsource, loaddate, hashdiff) as configured in config try: return self.get_attributes(roles=role)[0] except IndexError: return None def contains_pii_attributes(self) -> Boolean: return any([True for a in self.attributes.values() if a.is_pii ]) def get_source_entities(self, active_only: Boolean = True): """returns list of source entities by lookup of the target in the mapping-definition""" # Hier dürften nur Entities vom type = delivery auftauchen entities = dict() for sourcename, mapping in self.model.get_mappings().items(): tm = mapping.get_targetmapping_by_target(self.name) # FIXME: hier ggf. auch role-hubs einbeziehen: if tm: if active_only and tm.type != 'mappingonly': entities[sourcename] = self.model.get_entity(sourcename) elif not active_only: entities[sourcename] = self.model.get_entity(sourcename) return entities def validate(self) -> ErrorCollection: return ErrorCollection() def get_component_attributes(self, attributename: str) -> list: components = [] for sourcename, mapping in self.model.get_mappings().items(): tm = mapping.get_targetmapping_by_target(self.name) if tm: for am in tm.get_attribute_mappings(): if am.targetattribute_name == attributename: srcentity = self.model.get_entity(sourcename) for ea in am.source.get_expression_attributes().values(): components.append(dict(attribute=self.get_attribute(attributename), sourceentity=srcentity, sourceattribute=ea )) return components class DataVaultEntityAttributeExpression: #FIXME:wird das so noch benötigt? vgl Mapping > AttributeMappingExpression # VALUE_IND: # expression: # "max(case when {1} = 'IND' then {2} end)" # components: # 1: [thp, shortname] # 2: [tls, value] def __init__(self, rule, expression: str = '', components: dict = {}, resulttype: str = ''): """ expression => Expression, e.g.: "max(case when {placeholder1} = 'IND' then {placeholder2} end)" components => dict of list, e.g.: {placeholder1: [tablealias, column], placeholder2: [tablealias, column]} """ self._expression = expression self.components = components self._resulttype = resulttype self._rule = rule @property def expression(self) -> str: return self._expression def get_components(self) -> dict: return self.components @property def datatype(self) -> str: return self._resulttype def get_parsed_expression(self) -> str: parsed_result = self.expression template = self._rule.entity.model.basetemplates.get('attribute_expression') for placeholder, component in self.get_components().items(): parsed_result = parsed_result.replace('{' + str(placeholder) + '}', template.render(component=component)) return parsed_result class DataVaultEntityRule: def __init__(self, entity, name: str, definition: dict = None): self.entity = entity self.name = name self.expression = DataVaultEntityAttributeExpression(self, definition.get('expression'), definition.get('attributes'), definition.get('resulttype') ) class MappingSource: def __init__(self, model, entity): self.model = model self.entity = entity self.name = entity.name def get_target_entity_names(self, active_only: Boolean = False) -> list: entities = [] if self.name in self.model.mappings: for m in self.model.mappings.get(self.name).get_targetmappings().values(): if active_only and m.type != 'mappingonly': entities.append(m.targetentityname) else: entities.append(m.targetentityname) return entities def get_target_entities(self) -> list: """returns list of direct mapped entites""" if self.name in self.model.mappings: return self.model.mappings.get(self.name).get_target_entities() return [] def get_target_entity_hash_components(self, target) -> list: """Get the components for a target-entities hashkey. Since there a different naming for each source, use the attribute names of the source""" hashcomponents = [] attributemappings = self.get_attribute_mappings_by_target(target.name) # stg -> einzelne entity for am in attributemappings: hashcomponents.append({'sourceexpression': am.source, 'targetattribute': am.target }) # FIXME: bkcc-attribute sollte immer am Anfang stehen. return hashcomponents def get_target_entities_hash_components(self) -> dict: hash_keys = {} targets = self.get_target_entities() # Direct mapped Targets for target in targets: if target.type in ["hub", "reference"]: #TODO: reference ist hier eine besonderheit, da der Key zwar nicht gehashed wird, aber genau dadurch "anfällig" für umbennenung von Quelle zu ziel ist. hash_keys[target.key_attribute.name] = dict(hashattribute=target.key_attribute, components=self.get_target_entity_hash_components(target), targetentity=target) if target.type in ["link"]: # Für den Link: attribute aus mapping + linked entity attribute aus mapping linkhashcomponents = [] for le in target.get_linked_entities(): linkhashcomponents.extend(self.get_target_entity_hash_components(le)) linkhashcomponents.extend(self.get_target_entity_hash_components(target)) hash_keys[target.key_attribute.name] = dict(hashattribute=target.key_attribute, components=linkhashcomponents, targetentity=target) if target.type in ["satellite"]: hash_keys[target.hashdiff_fk_attribute.name] = dict(hashattribute=target.hashdiff_fk_attribute, components=self.get_target_entity_hash_components( target), targetentity=target) # Wenn der referenzierte Hub bzw. Link-Key noch nicht in der Liste steht if target.get_parent_key_attribute().name not in hash_keys: hash_keys[target.get_parent_key_attribute().name] = { "hashattribute": target.get_parent_key_attribute(), "components": []} return hash_keys def get_target_entities_hash_attributes(self) -> dict: attributes = dict() targets = self.get_target_entities() for target in targets: if target.type in ["hub", "link"]: attributes[target.key_attribute.name] = target.key_attribute # if target.type in ["link"]: #BUG: Dieser mechanismuss sorgt dafür, dass die reihenfolge vom mapping abweicht. # for le in target.getLinkedEntities(): # attributes[le.key_attribute.name] = le.key_attribute if target.type in ["satellite"]: attributes[target.hashdiff_fk_attribute.name] = target.hashdiff_fk_attribute # Wenn der referenzierte Hub bzw. Link-Key noch nicht in der Liste steht if target.get_parent_key_attribute().name not in attributes: attributes[target.get_parent_key_attribute().name] = target.get_parent_key_attribute() return attributes def get_mappings(self) -> dict: """return a dict of mappings, specified for the entity.""" return self.model.get_mapping(self.name) def get_attribute_mappings_by_target(self, target_entity_name: str) -> dict: """return a dict of source to target attribute-mappings """ return self.get_mappings().get_attribute_mappings_by_target(target_entity_name) class DynamicProperties(object): @classmethod def from_kwargs(cls, **kwargs): obj = cls() for (field, value) in kwargs.items(): setattr(obj, field, value) return obj def __getattr__(self, attr): return None #TODO: New Feature: Option zum Exportieren einer Objektinstanz nach yaml