This repository has been archived on 2026-03-20. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
datavaultgenerator-1.1.5/DataVaultGenerator/Components.py

600 lines
23 KiB
Python

import logging
import re
from shutil import Error
import sys
import collections
from xmlrpc.client import Boolean
from rich.panel import Panel
from rich.tree import Tree
from rich import print
from jinja2 import TemplateNotFound, UndefinedError
def add_to_log_tree(tree: Tree, path: tuple, currentindex):
if currentindex < len(path):
child = tree.add(str(path[currentindex]))
currentindex+=1
return add_to_log_tree(child, path, currentindex)
else:
return tree
def log(level: int, title: str, path: tuple, message: str, printout: bool = False):
tree=Tree(str(path[0]), highlight=True)
last = add_to_log_tree(tree ,path, 1)
last.add(message)
print(Panel(tree, title="[red]"+str(title), padding=1,title_align="left" ))
#def log(level: int, title: str, path: tuple, message: str, printout: bool = False):
# tree=Tree(str(path[0]))
# print(Panel(tree, title="[red]"+str(title), expand=False,padding=1 ))
# line = ""
# line = "----------------------------- " + title + " -----------------------------"
# logging.log(level, line)
# if printout:
# print(line)
# lvl = 0
# for i in path:
# if lvl == 0:
# line = " " + str(i)
# logging.log(level, line)
# if printout:
# print(line)
# else:
# line = (" " * lvl) + " |-- " + str(i)
# logging.log(level, line)
# if printout:
# print(line)
# lvl = lvl + 1
#
# line = (" " * lvl) + " |--> " + message
# logging.log(level, line )
#
# if printout:
# print(line)
# print("------------------------------------------------------------" + "-" * len(title))
class ErrorCollection:
def __init__(self):
self._errors = []
def add(self, title, path, message):
self._errors.append(dict(title = title, path = path, message = message))
def append(self, errors):
self._errors += errors._errors
@property
def count(self) -> int:
return len(self._errors)
@property
def errors(self) -> list:
return self._errors
class DBEntity:
def __init__(self, entityname: str, entity, entitydefaults: dict, entitydbdefinition=None):
self.entity = entity
self._definition = entitydefaults | (entitydbdefinition or {})
self.name = self._definition.get('name', entityname)
self.database = self._definition.get('database', '')
self.schema = self._definition.get('schema', '')
self.filegroup = self._definition.get('filegroup', '')
self.properties = self._definition.get('properties', {})
def get_qualifier(self, include_db: Boolean = True) -> str:
""" returns rendered Qualifier"""
return self.entity.model.basetemplates.get('table_qualifier').render(dbentity=self, includeDB=include_db)
class DataVaultEntityAttribute(object):
#TODO: ggf. die Definition der Attribute umstellen von Liste auf dict:
# attributes:
# - {name: cust_no, type: 'varchar(32)'}
# attributes:
# cust_no: {type: 'varchar(32)'}
#
# Pattern zum Ersetzen in VSCODE: suche: - \{name: (.*), Ersetze durch: $1: {
# __slots__ = ('_definition'
# ,'entity'
# ,'name'
# ,'datatype'
# ,'is_mandatory'
# ,'logicalname'
# ,'description'
# ,'role'
# ,'_ghostvalue' )
def __init__(self, entity, definition):
"""
:rtype: object
"""
#self.id = uuid.uuid4().hex
self._definition = definition
self.entity = entity
self.name = definition.get('name', '')
#self.datatype = definition.get('type', '') # Old: type="varchar(100)"
self._type = definition.get('type', '') # New: type='varchar'
self.length = definition.get('length', '')
self.precision = definition.get('precision', '')
self.scale = definition.get('scale', '')
self.default = definition.get('default', '')
#TODO: self.datatype als property => varchar(100)
# self.type als native type
self.is_mandatory = definition.get('mandatory', False)
self.logicalname = definition.get('logicalname', '') #fixme: in schema aufnehmnen
self.description = definition.get('description', '')
self.role = definition.get('role',
self.entity.model.config.entitydefaults[self.entity.type].get('attribute_role','base'))
self._ghostvalue = definition.get('ghost')
self.properties = definition.get('props', {})
self.order = definition.get('order')
self.is_pii = definition.get('pii', False)
@property
def datatype(self) -> str: # => Full datatype
return self._type
@property
def ghostvalue(self) -> str:
if not self._ghostvalue:
return self.entity.model.config.datavault.ghostrecord.get(self.native_datatype.lower(),
self.entity.model.config.datavault.ghostrecord.get('other', ''))
else:
return self._ghostvalue
@property
def native_datatype(self) -> str:
"""Returns the native datatype expression. E.g. nvarchar"""
return self.datatype[:self.datatype.find('(')].strip().lower() if self.datatype.find(
'(') != -1 else self.datatype
# IMPROVE: Müsste eigentlich in der Attributdefinition explizit drin stehen.
@property
def native_datatypelength(self) -> str:
"""Returns the native datatype length. E.g. nvarchar(100) -> 100 """
return self.datatype[self.datatype.find('(') + 1:self.datatype.find(')')].strip().lower() if self.datatype.find('(') != -1 else ''
# IMPROVE: Müsste eigentlich in der Attributdefinition explizit drin stehen.
@property
def column_definition(self) -> str:
"""Returns the columndefinition, based on the configured template."""
return self.entity.model.basetemplates.get('column_ddl').render(attribute=self)
def copy(self, newname: str = ''):
if newname:
copy = DataVaultEntityAttribute(self.entity, self._definition)
copy.name = newname
return copy
else:
return DataVaultEntityAttribute(self.entity, self._definition)
def validate(self, spec):
errors = ErrorCollection()
if not spec:
return errors
logging.debug('Validating attribute <%s>',self.name)
is_valid = False
for datatype, definition in self.entity.model.sys_specifications[spec]['datatypes'].items():
matches = re.findall(definition.get('pattern'), self._type, re.MULTILINE | re.IGNORECASE)
for m in matches:
is_valid = True
if not is_valid:
logging.debug('datatype <%s> of attribute <%s> not valid',self._type, self.name)
errors.add("VALIDATION ERROR",
(self.entity.filename,"Attribute", "<" + self.name + ">"),
f'Datatype <{self._type}> not valid (not matching any pattern in {spec})')
return errors
class DerivedAttribute(DataVaultEntityAttribute):
pass
class GeneratorEntity:
def __init__(self, model, filename: str, definition: dict = None):
# logging.info('Creating Entity %s',definition['name'])
self.model = model
self.filename = filename
self._definition = definition
self.id = definition.get('name')
self.name = definition.get('name')
self.type = definition.get('type')
self.subtype = definition.get('subtype', 'base')
self.description = definition.get('description', '')
self.generate = definition.get('generate', 1)
self.extra = definition.get('extra', {})
self._sql_pre_hook = definition.get('sql_pre_hook', '')
self._sql_post_hook = definition.get('sql_post_hook', '')
@property
def type_display_name(self) -> str:
return self.model.get_types().get(self.type).get('displayname')
@property
def subtype_display_name(self) -> str:
return self.model.get_subtypes().get(self.subtype).get('displayname')
def render_template(self, templatefilename: str):
"""Renders the entity by a given template an returns the result als string."""
try:
template = self.model.templateEnvironment.get_template(templatefilename)
#print( self.model.templateEnvironment.loader.get_source( self.model.templateEnvironment, templatefilename))
#checksum = hashlib.md5(str(template).encode()).hexdigest().upper()
output = template.render(
entity=self,
templatename=templatefilename,
templateversion=''
)
except TemplateNotFound:
print("")
print(Panel(f"[red]Error while rendering entity-templates[/red]: Template {templatefilename} not found.", title="[red]RENDER ERROR", padding=1,title_align="left" ))
logging.error(f"Template {templatefilename} not found.")
#print(f"Template {templatefilename} not found.")
sys.exit(2)
except UndefinedError as e:
print("")
logging.error(f"Error while rendering entity {self.name} :")
logging.error(e)
print(f"Error while rendering entity {self.name} :", e)
sys.exit(2)
return output
@property
def sql_pre_hook(self) -> str:
return self.model.templateEnvironment.from_string(self._sql_pre_hook).render(this=self)
@property
def sql_post_hook(self) -> str:
return self.model.templateEnvironment.from_string(self._sql_post_hook).render(this=self)
def get_component_entities(self):
return []
class Layer(GeneratorEntity):
def __init__(self, model, layerid, definition):
GeneratorEntity.__init__(self, model, '', definition)
self.id = layerid
@property
def database(self) -> str:
return self.model.config.layer.get(self.id).get('defaultdatabaseobject').get('database')
@property
def schema(self) -> str:
return self.model.config.layer.get(self.id).get('defaultdatabaseobject').get('schema')
@property
def filegroup(self) -> str:
return self.model.config.layer.get(self.id).get('defaultdatabaseobject').get('filegroup')
@property
def sys_specification(self) -> str:
return self.model.config.layer.get(self.id).get('sys_specification','')
@property
def connection_name(self) -> str:
return self.model.config.layer.get(self.id).get('connectionname','')
def get_entities(self) -> list:
return {k: v for k, v in self.model.entities.items() if v.layer == self}
def get_entity_count(self):
return sum(1 for e in self.model.entities.values() if e.layer == self)
class DataVaultEntity(GeneratorEntity):
def __init__(self, model, filename: str, definition: dict = None):
GeneratorEntity.__init__(self, model, filename, definition)
# logging.info('Creating Entity %s',definition['name'])
self._layername = definition.get('layer', self.model.config.entitydefaults[self.type]['layer'])
self.attributes = collections.OrderedDict()
excludecommonattribute = definition.get('exclude_commonattributes', [])
commonattributes = (a for a in self.model.config.entitydefaults.get(self.type, {}).get('attributes', []) if a not in excludecommonattribute )
for attribute_rolename in commonattributes:
attr = DataVaultEntityAttribute(self, self.model.config.commonattributes.get(attribute_rolename))
attr.role = attribute_rolename
self.add_attribute(attr)
for attrdef in definition.get('attributes',[]):
self.add_attribute(DataVaultEntityAttribute(self, attrdef))
@property
def layer(self) -> Layer:
"""Returns the entity layer."""
return self.model.get_layer(self._layername)
@property
def dbentity(self) -> DBEntity:
return DBEntity(self.name,
self,
self.model.config.layer.get(self._layername, {}).get('defaultdatabaseobject', {}),
self._definition.get('dbentity'))
def add_attribute(self, attribute: DataVaultEntityAttribute):
"""add an attribute to the entity."""
self.attributes[attribute.name] = attribute
def get_attribute(self, name: str) -> DataVaultEntityAttribute:
"""get attribute by name."""
return self.attributes.get(name, self.get_foreign_attribute(name))
def get_attributes(self, roles: list = 'all', exclude: list = ()) -> list[DataVaultEntityAttribute]:
"""returns a list of attributes for one or more given roles. You can exclude certain attribute-roles"""
if 'all' in roles:
return [a for a in self.attributes.values() if a.role not in exclude] + self.get_foreign_attributes()
elif 'fk' in roles: # IMPROVE: könnte vereinfacht werden, wenn Attributreferenzen bereits aufgelöst wären.
return [a for a in self.attributes.values() if a.role in roles] + self.get_foreign_attributes()
else:
return [a for a in self.attributes.values() if a.role in roles and a.role not in exclude]
def get_foreign_attribute(self, name: str) -> DataVaultEntityAttribute:
# gibt attribut, dass auf einer verbundenen Entität liegt zurück. Diese Methode ist als Erweiterung für
# getAttribute gedacht und muss von den jeweiligen Entitäten implementiert werden.
pass
def get_foreign_attributes(self) -> DataVaultEntityAttribute:
# gibt eine Liste von attributen, die auf einer verbundenen Entität liegen zurück. Diese Methode ist als
# Erweiterung für getAttributes gedacht und muss von den jeweiligen Entitäten implementiert werden.
return []
def get_role_attribute(self, role: str) -> DataVaultEntityAttribute:
# returns a specific role-attribute (recordsource, loaddate, hashdiff) as configured in config
try:
return self.get_attributes(roles=role)[0]
except IndexError:
return None
def contains_pii_attributes(self) -> Boolean:
return any([True for a in self.attributes.values() if a.is_pii ])
def get_source_entities(self, active_only: Boolean = True):
"""returns list of source entities by lookup of the target in the mapping-definition"""
# Hier dürften nur Entities vom type = delivery auftauchen
entities = dict()
for sourcename, mapping in self.model.get_mappings().items():
tm = mapping.get_targetmapping_by_target(self.name) # FIXME: hier ggf. auch role-hubs einbeziehen:
if tm:
if active_only and tm.type != 'mappingonly':
entities[sourcename] = self.model.get_entity(sourcename)
elif not active_only:
entities[sourcename] = self.model.get_entity(sourcename)
return entities
def validate(self) -> ErrorCollection:
return ErrorCollection()
def get_component_attributes(self, attributename: str) -> list:
components = []
for sourcename, mapping in self.model.get_mappings().items():
tm = mapping.get_targetmapping_by_target(self.name)
if tm:
for am in tm.get_attribute_mappings():
if am.targetattribute_name == attributename:
srcentity = self.model.get_entity(sourcename)
for ea in am.source.get_expression_attributes().values():
components.append(dict(attribute=self.get_attribute(attributename),
sourceentity=srcentity,
sourceattribute=ea
))
return components
class DataVaultEntityAttributeExpression: #FIXME:wird das so noch benötigt? vgl Mapping > AttributeMappingExpression
# VALUE_IND:
# expression:
# "max(case when {1} = 'IND' then {2} end)"
# components:
# 1: [thp, shortname]
# 2: [tls, value]
def __init__(self, rule, expression: str = '', components: dict = {}, resulttype: str = ''):
"""
expression => Expression, e.g.: "max(case when {placeholder1} = 'IND' then {placeholder2} end)"
components => dict of list, e.g.: {placeholder1: [tablealias, column],
placeholder2: [tablealias, column]}
"""
self._expression = expression
self.components = components
self._resulttype = resulttype
self._rule = rule
@property
def expression(self) -> str:
return self._expression
def get_components(self) -> dict:
return self.components
@property
def datatype(self) -> str:
return self._resulttype
def get_parsed_expression(self) -> str:
parsed_result = self.expression
template = self._rule.entity.model.basetemplates.get('attribute_expression')
for placeholder, component in self.get_components().items():
parsed_result = parsed_result.replace('{' + str(placeholder) + '}', template.render(component=component))
return parsed_result
class DataVaultEntityRule:
def __init__(self, entity, name: str, definition: dict = None):
self.entity = entity
self.name = name
self.expression = DataVaultEntityAttributeExpression(self,
definition.get('expression'),
definition.get('attributes'),
definition.get('resulttype')
)
class MappingSource:
def __init__(self, model, entity):
self.model = model
self.entity = entity
self.name = entity.name
def get_target_entity_names(self, active_only: Boolean = False) -> list:
entities = []
if self.name in self.model.mappings:
for m in self.model.mappings.get(self.name).get_targetmappings().values():
if active_only and m.type != 'mappingonly':
entities.append(m.targetentityname)
else:
entities.append(m.targetentityname)
return entities
def get_target_entities(self) -> list:
"""returns list of direct mapped entites"""
if self.name in self.model.mappings:
return self.model.mappings.get(self.name).get_target_entities()
return []
def get_target_entity_hash_components(self, target) -> list:
"""Get the components for a target-entities hashkey. Since there a different naming for each source, use the attribute names of the source"""
hashcomponents = []
attributemappings = self.get_attribute_mappings_by_target(target.name) # stg -> einzelne entity
for am in attributemappings:
hashcomponents.append({'sourceexpression': am.source,
'targetattribute': am.target
})
# FIXME: bkcc-attribute sollte immer am Anfang stehen.
return hashcomponents
def get_target_entities_hash_components(self) -> dict:
hash_keys = {}
targets = self.get_target_entities() # Direct mapped Targets
for target in targets:
if target.type in ["hub", "reference"]: #TODO: reference ist hier eine besonderheit, da der Key zwar nicht gehashed wird, aber genau dadurch "anfällig" für umbennenung von Quelle zu ziel ist.
hash_keys[target.key_attribute.name] = dict(hashattribute=target.key_attribute,
components=self.get_target_entity_hash_components(target),
targetentity=target)
if target.type in ["link"]:
# Für den Link: attribute aus mapping + linked entity attribute aus mapping
linkhashcomponents = []
for le in target.get_linked_entities():
linkhashcomponents.extend(self.get_target_entity_hash_components(le))
linkhashcomponents.extend(self.get_target_entity_hash_components(target))
hash_keys[target.key_attribute.name] = dict(hashattribute=target.key_attribute,
components=linkhashcomponents,
targetentity=target)
if target.type in ["satellite"]:
hash_keys[target.hashdiff_fk_attribute.name] = dict(hashattribute=target.hashdiff_fk_attribute,
components=self.get_target_entity_hash_components(
target),
targetentity=target)
# Wenn der referenzierte Hub bzw. Link-Key noch nicht in der Liste steht
if target.get_parent_key_attribute().name not in hash_keys:
hash_keys[target.get_parent_key_attribute().name] = {
"hashattribute": target.get_parent_key_attribute(), "components": []}
return hash_keys
def get_target_entities_hash_attributes(self) -> dict:
attributes = dict()
targets = self.get_target_entities()
for target in targets:
if target.type in ["hub", "link"]:
attributes[target.key_attribute.name] = target.key_attribute
# if target.type in ["link"]: #BUG: Dieser mechanismuss sorgt dafür, dass die reihenfolge vom mapping abweicht.
# for le in target.getLinkedEntities():
# attributes[le.key_attribute.name] = le.key_attribute
if target.type in ["satellite"]:
attributes[target.hashdiff_fk_attribute.name] = target.hashdiff_fk_attribute
# Wenn der referenzierte Hub bzw. Link-Key noch nicht in der Liste steht
if target.get_parent_key_attribute().name not in attributes:
attributes[target.get_parent_key_attribute().name] = target.get_parent_key_attribute()
return attributes
def get_mappings(self) -> dict:
"""return a dict of mappings, specified for the entity."""
return self.model.get_mapping(self.name)
def get_attribute_mappings_by_target(self, target_entity_name: str) -> dict:
"""return a dict of source to target attribute-mappings """
return self.get_mappings().get_attribute_mappings_by_target(target_entity_name)
class DynamicProperties(object):
@classmethod
def from_kwargs(cls, **kwargs):
obj = cls()
for (field, value) in kwargs.items():
setattr(obj, field, value)
return obj
def __getattr__(self, attr):
return None
#TODO: New Feature: Option zum Exportieren einer Objektinstanz nach yaml