600 lines
23 KiB
Python
600 lines
23 KiB
Python
import logging
|
|
import re
|
|
from shutil import Error
|
|
import sys
|
|
import collections
|
|
from xmlrpc.client import Boolean
|
|
from rich.panel import Panel
|
|
from rich.tree import Tree
|
|
from rich import print
|
|
|
|
from jinja2 import TemplateNotFound, UndefinedError
|
|
|
|
def add_to_log_tree(tree: Tree, path: tuple, currentindex):
|
|
if currentindex < len(path):
|
|
child = tree.add(str(path[currentindex]))
|
|
currentindex+=1
|
|
return add_to_log_tree(child, path, currentindex)
|
|
else:
|
|
return tree
|
|
|
|
def log(level: int, title: str, path: tuple, message: str, printout: bool = False):
|
|
tree=Tree(str(path[0]), highlight=True)
|
|
last = add_to_log_tree(tree ,path, 1)
|
|
last.add(message)
|
|
print(Panel(tree, title="[red]"+str(title), padding=1,title_align="left" ))
|
|
|
|
|
|
#def log(level: int, title: str, path: tuple, message: str, printout: bool = False):
|
|
# tree=Tree(str(path[0]))
|
|
# print(Panel(tree, title="[red]"+str(title), expand=False,padding=1 ))
|
|
# line = ""
|
|
# line = "----------------------------- " + title + " -----------------------------"
|
|
# logging.log(level, line)
|
|
# if printout:
|
|
# print(line)
|
|
# lvl = 0
|
|
# for i in path:
|
|
# if lvl == 0:
|
|
# line = " " + str(i)
|
|
# logging.log(level, line)
|
|
# if printout:
|
|
# print(line)
|
|
# else:
|
|
# line = (" " * lvl) + " |-- " + str(i)
|
|
# logging.log(level, line)
|
|
# if printout:
|
|
# print(line)
|
|
# lvl = lvl + 1
|
|
#
|
|
# line = (" " * lvl) + " |--> " + message
|
|
# logging.log(level, line )
|
|
#
|
|
# if printout:
|
|
# print(line)
|
|
# print("------------------------------------------------------------" + "-" * len(title))
|
|
|
|
|
|
class ErrorCollection:
|
|
|
|
def __init__(self):
|
|
self._errors = []
|
|
|
|
def add(self, title, path, message):
|
|
self._errors.append(dict(title = title, path = path, message = message))
|
|
|
|
def append(self, errors):
|
|
self._errors += errors._errors
|
|
|
|
@property
|
|
def count(self) -> int:
|
|
return len(self._errors)
|
|
|
|
@property
|
|
def errors(self) -> list:
|
|
return self._errors
|
|
|
|
|
|
|
|
class DBEntity:
|
|
|
|
def __init__(self, entityname: str, entity, entitydefaults: dict, entitydbdefinition=None):
|
|
|
|
self.entity = entity
|
|
|
|
self._definition = entitydefaults | (entitydbdefinition or {})
|
|
|
|
self.name = self._definition.get('name', entityname)
|
|
self.database = self._definition.get('database', '')
|
|
self.schema = self._definition.get('schema', '')
|
|
self.filegroup = self._definition.get('filegroup', '')
|
|
self.properties = self._definition.get('properties', {})
|
|
|
|
def get_qualifier(self, include_db: Boolean = True) -> str:
|
|
""" returns rendered Qualifier"""
|
|
return self.entity.model.basetemplates.get('table_qualifier').render(dbentity=self, includeDB=include_db)
|
|
|
|
|
|
class DataVaultEntityAttribute(object):
|
|
|
|
#TODO: ggf. die Definition der Attribute umstellen von Liste auf dict:
|
|
|
|
# attributes:
|
|
# - {name: cust_no, type: 'varchar(32)'}
|
|
# attributes:
|
|
# cust_no: {type: 'varchar(32)'}
|
|
#
|
|
# Pattern zum Ersetzen in VSCODE: suche: - \{name: (.*), Ersetze durch: $1: {
|
|
|
|
|
|
|
|
# __slots__ = ('_definition'
|
|
# ,'entity'
|
|
# ,'name'
|
|
# ,'datatype'
|
|
# ,'is_mandatory'
|
|
# ,'logicalname'
|
|
# ,'description'
|
|
# ,'role'
|
|
# ,'_ghostvalue' )
|
|
def __init__(self, entity, definition):
|
|
"""
|
|
|
|
:rtype: object
|
|
"""
|
|
|
|
#self.id = uuid.uuid4().hex
|
|
self._definition = definition
|
|
self.entity = entity
|
|
self.name = definition.get('name', '')
|
|
|
|
#self.datatype = definition.get('type', '') # Old: type="varchar(100)"
|
|
|
|
self._type = definition.get('type', '') # New: type='varchar'
|
|
self.length = definition.get('length', '')
|
|
self.precision = definition.get('precision', '')
|
|
self.scale = definition.get('scale', '')
|
|
self.default = definition.get('default', '')
|
|
|
|
#TODO: self.datatype als property => varchar(100)
|
|
# self.type als native type
|
|
|
|
self.is_mandatory = definition.get('mandatory', False)
|
|
|
|
self.logicalname = definition.get('logicalname', '') #fixme: in schema aufnehmnen
|
|
self.description = definition.get('description', '')
|
|
|
|
self.role = definition.get('role',
|
|
self.entity.model.config.entitydefaults[self.entity.type].get('attribute_role','base'))
|
|
|
|
|
|
|
|
self._ghostvalue = definition.get('ghost')
|
|
|
|
self.properties = definition.get('props', {})
|
|
|
|
self.order = definition.get('order')
|
|
self.is_pii = definition.get('pii', False)
|
|
|
|
|
|
|
|
@property
|
|
def datatype(self) -> str: # => Full datatype
|
|
return self._type
|
|
|
|
@property
|
|
def ghostvalue(self) -> str:
|
|
if not self._ghostvalue:
|
|
return self.entity.model.config.datavault.ghostrecord.get(self.native_datatype.lower(),
|
|
self.entity.model.config.datavault.ghostrecord.get('other', ''))
|
|
else:
|
|
return self._ghostvalue
|
|
|
|
@property
|
|
def native_datatype(self) -> str:
|
|
"""Returns the native datatype expression. E.g. nvarchar"""
|
|
return self.datatype[:self.datatype.find('(')].strip().lower() if self.datatype.find(
|
|
'(') != -1 else self.datatype
|
|
# IMPROVE: Müsste eigentlich in der Attributdefinition explizit drin stehen.
|
|
|
|
@property
|
|
def native_datatypelength(self) -> str:
|
|
"""Returns the native datatype length. E.g. nvarchar(100) -> 100 """
|
|
return self.datatype[self.datatype.find('(') + 1:self.datatype.find(')')].strip().lower() if self.datatype.find('(') != -1 else ''
|
|
|
|
# IMPROVE: Müsste eigentlich in der Attributdefinition explizit drin stehen.
|
|
|
|
@property
|
|
def column_definition(self) -> str:
|
|
"""Returns the columndefinition, based on the configured template."""
|
|
return self.entity.model.basetemplates.get('column_ddl').render(attribute=self)
|
|
|
|
def copy(self, newname: str = ''):
|
|
if newname:
|
|
copy = DataVaultEntityAttribute(self.entity, self._definition)
|
|
copy.name = newname
|
|
return copy
|
|
else:
|
|
return DataVaultEntityAttribute(self.entity, self._definition)
|
|
|
|
def validate(self, spec):
|
|
errors = ErrorCollection()
|
|
if not spec:
|
|
return errors
|
|
|
|
logging.debug('Validating attribute <%s>',self.name)
|
|
|
|
is_valid = False
|
|
for datatype, definition in self.entity.model.sys_specifications[spec]['datatypes'].items():
|
|
matches = re.findall(definition.get('pattern'), self._type, re.MULTILINE | re.IGNORECASE)
|
|
for m in matches:
|
|
is_valid = True
|
|
|
|
if not is_valid:
|
|
logging.debug('datatype <%s> of attribute <%s> not valid',self._type, self.name)
|
|
|
|
errors.add("VALIDATION ERROR",
|
|
(self.entity.filename,"Attribute", "<" + self.name + ">"),
|
|
f'Datatype <{self._type}> not valid (not matching any pattern in {spec})')
|
|
|
|
return errors
|
|
|
|
|
|
class DerivedAttribute(DataVaultEntityAttribute):
|
|
pass
|
|
|
|
|
|
class GeneratorEntity:
|
|
def __init__(self, model, filename: str, definition: dict = None):
|
|
# logging.info('Creating Entity %s',definition['name'])
|
|
self.model = model
|
|
self.filename = filename
|
|
self._definition = definition
|
|
self.id = definition.get('name')
|
|
self.name = definition.get('name')
|
|
self.type = definition.get('type')
|
|
self.subtype = definition.get('subtype', 'base')
|
|
self.description = definition.get('description', '')
|
|
self.generate = definition.get('generate', 1)
|
|
self.extra = definition.get('extra', {})
|
|
self._sql_pre_hook = definition.get('sql_pre_hook', '')
|
|
self._sql_post_hook = definition.get('sql_post_hook', '')
|
|
|
|
|
|
|
|
@property
|
|
def type_display_name(self) -> str:
|
|
return self.model.get_types().get(self.type).get('displayname')
|
|
|
|
@property
|
|
def subtype_display_name(self) -> str:
|
|
return self.model.get_subtypes().get(self.subtype).get('displayname')
|
|
|
|
def render_template(self, templatefilename: str):
|
|
"""Renders the entity by a given template an returns the result als string."""
|
|
|
|
try:
|
|
template = self.model.templateEnvironment.get_template(templatefilename)
|
|
#print( self.model.templateEnvironment.loader.get_source( self.model.templateEnvironment, templatefilename))
|
|
|
|
#checksum = hashlib.md5(str(template).encode()).hexdigest().upper()
|
|
output = template.render(
|
|
entity=self,
|
|
templatename=templatefilename,
|
|
templateversion=''
|
|
)
|
|
|
|
except TemplateNotFound:
|
|
print("")
|
|
print(Panel(f"[red]Error while rendering entity-templates[/red]: Template {templatefilename} not found.", title="[red]RENDER ERROR", padding=1,title_align="left" ))
|
|
logging.error(f"Template {templatefilename} not found.")
|
|
#print(f"Template {templatefilename} not found.")
|
|
|
|
sys.exit(2)
|
|
except UndefinedError as e:
|
|
print("")
|
|
logging.error(f"Error while rendering entity {self.name} :")
|
|
logging.error(e)
|
|
print(f"Error while rendering entity {self.name} :", e)
|
|
sys.exit(2)
|
|
return output
|
|
|
|
@property
|
|
def sql_pre_hook(self) -> str:
|
|
return self.model.templateEnvironment.from_string(self._sql_pre_hook).render(this=self)
|
|
|
|
@property
|
|
def sql_post_hook(self) -> str:
|
|
return self.model.templateEnvironment.from_string(self._sql_post_hook).render(this=self)
|
|
|
|
def get_component_entities(self):
|
|
return []
|
|
|
|
|
|
class Layer(GeneratorEntity):
|
|
def __init__(self, model, layerid, definition):
|
|
GeneratorEntity.__init__(self, model, '', definition)
|
|
self.id = layerid
|
|
|
|
@property
|
|
def database(self) -> str:
|
|
return self.model.config.layer.get(self.id).get('defaultdatabaseobject').get('database')
|
|
|
|
@property
|
|
def schema(self) -> str:
|
|
return self.model.config.layer.get(self.id).get('defaultdatabaseobject').get('schema')
|
|
|
|
@property
|
|
def filegroup(self) -> str:
|
|
return self.model.config.layer.get(self.id).get('defaultdatabaseobject').get('filegroup')
|
|
|
|
@property
|
|
def sys_specification(self) -> str:
|
|
return self.model.config.layer.get(self.id).get('sys_specification','')
|
|
|
|
|
|
@property
|
|
def connection_name(self) -> str:
|
|
return self.model.config.layer.get(self.id).get('connectionname','')
|
|
|
|
def get_entities(self) -> list:
|
|
return {k: v for k, v in self.model.entities.items() if v.layer == self}
|
|
|
|
def get_entity_count(self):
|
|
return sum(1 for e in self.model.entities.values() if e.layer == self)
|
|
|
|
|
|
class DataVaultEntity(GeneratorEntity):
|
|
def __init__(self, model, filename: str, definition: dict = None):
|
|
GeneratorEntity.__init__(self, model, filename, definition)
|
|
# logging.info('Creating Entity %s',definition['name'])
|
|
|
|
self._layername = definition.get('layer', self.model.config.entitydefaults[self.type]['layer'])
|
|
|
|
self.attributes = collections.OrderedDict()
|
|
|
|
excludecommonattribute = definition.get('exclude_commonattributes', [])
|
|
commonattributes = (a for a in self.model.config.entitydefaults.get(self.type, {}).get('attributes', []) if a not in excludecommonattribute )
|
|
for attribute_rolename in commonattributes:
|
|
attr = DataVaultEntityAttribute(self, self.model.config.commonattributes.get(attribute_rolename))
|
|
attr.role = attribute_rolename
|
|
self.add_attribute(attr)
|
|
|
|
for attrdef in definition.get('attributes',[]):
|
|
self.add_attribute(DataVaultEntityAttribute(self, attrdef))
|
|
|
|
@property
|
|
def layer(self) -> Layer:
|
|
"""Returns the entity layer."""
|
|
return self.model.get_layer(self._layername)
|
|
|
|
@property
|
|
def dbentity(self) -> DBEntity:
|
|
return DBEntity(self.name,
|
|
self,
|
|
self.model.config.layer.get(self._layername, {}).get('defaultdatabaseobject', {}),
|
|
self._definition.get('dbentity'))
|
|
|
|
def add_attribute(self, attribute: DataVaultEntityAttribute):
|
|
"""add an attribute to the entity."""
|
|
self.attributes[attribute.name] = attribute
|
|
|
|
def get_attribute(self, name: str) -> DataVaultEntityAttribute:
|
|
"""get attribute by name."""
|
|
return self.attributes.get(name, self.get_foreign_attribute(name))
|
|
|
|
def get_attributes(self, roles: list = 'all', exclude: list = ()) -> list[DataVaultEntityAttribute]:
|
|
"""returns a list of attributes for one or more given roles. You can exclude certain attribute-roles"""
|
|
if 'all' in roles:
|
|
return [a for a in self.attributes.values() if a.role not in exclude] + self.get_foreign_attributes()
|
|
elif 'fk' in roles: # IMPROVE: könnte vereinfacht werden, wenn Attributreferenzen bereits aufgelöst wären.
|
|
return [a for a in self.attributes.values() if a.role in roles] + self.get_foreign_attributes()
|
|
else:
|
|
return [a for a in self.attributes.values() if a.role in roles and a.role not in exclude]
|
|
|
|
def get_foreign_attribute(self, name: str) -> DataVaultEntityAttribute:
|
|
# gibt attribut, dass auf einer verbundenen Entität liegt zurück. Diese Methode ist als Erweiterung für
|
|
# getAttribute gedacht und muss von den jeweiligen Entitäten implementiert werden.
|
|
pass
|
|
|
|
def get_foreign_attributes(self) -> DataVaultEntityAttribute:
|
|
# gibt eine Liste von attributen, die auf einer verbundenen Entität liegen zurück. Diese Methode ist als
|
|
# Erweiterung für getAttributes gedacht und muss von den jeweiligen Entitäten implementiert werden.
|
|
return []
|
|
|
|
def get_role_attribute(self, role: str) -> DataVaultEntityAttribute:
|
|
# returns a specific role-attribute (recordsource, loaddate, hashdiff) as configured in config
|
|
try:
|
|
return self.get_attributes(roles=role)[0]
|
|
except IndexError:
|
|
return None
|
|
|
|
def contains_pii_attributes(self) -> Boolean:
|
|
return any([True for a in self.attributes.values() if a.is_pii ])
|
|
|
|
def get_source_entities(self, active_only: Boolean = True):
|
|
"""returns list of source entities by lookup of the target in the mapping-definition"""
|
|
# Hier dürften nur Entities vom type = delivery auftauchen
|
|
entities = dict()
|
|
|
|
for sourcename, mapping in self.model.get_mappings().items():
|
|
tm = mapping.get_targetmapping_by_target(self.name) # FIXME: hier ggf. auch role-hubs einbeziehen:
|
|
if tm:
|
|
if active_only and tm.type != 'mappingonly':
|
|
entities[sourcename] = self.model.get_entity(sourcename)
|
|
elif not active_only:
|
|
entities[sourcename] = self.model.get_entity(sourcename)
|
|
|
|
return entities
|
|
|
|
def validate(self) -> ErrorCollection:
|
|
return ErrorCollection()
|
|
|
|
def get_component_attributes(self, attributename: str) -> list:
|
|
components = []
|
|
for sourcename, mapping in self.model.get_mappings().items():
|
|
tm = mapping.get_targetmapping_by_target(self.name)
|
|
if tm:
|
|
for am in tm.get_attribute_mappings():
|
|
if am.targetattribute_name == attributename:
|
|
srcentity = self.model.get_entity(sourcename)
|
|
for ea in am.source.get_expression_attributes().values():
|
|
components.append(dict(attribute=self.get_attribute(attributename),
|
|
sourceentity=srcentity,
|
|
sourceattribute=ea
|
|
))
|
|
|
|
return components
|
|
|
|
|
|
|
|
class DataVaultEntityAttributeExpression: #FIXME:wird das so noch benötigt? vgl Mapping > AttributeMappingExpression
|
|
# VALUE_IND:
|
|
# expression:
|
|
# "max(case when {1} = 'IND' then {2} end)"
|
|
# components:
|
|
# 1: [thp, shortname]
|
|
# 2: [tls, value]
|
|
|
|
def __init__(self, rule, expression: str = '', components: dict = {}, resulttype: str = ''):
|
|
"""
|
|
expression => Expression, e.g.: "max(case when {placeholder1} = 'IND' then {placeholder2} end)"
|
|
components => dict of list, e.g.: {placeholder1: [tablealias, column],
|
|
placeholder2: [tablealias, column]}
|
|
"""
|
|
|
|
self._expression = expression
|
|
self.components = components
|
|
self._resulttype = resulttype
|
|
self._rule = rule
|
|
|
|
@property
|
|
def expression(self) -> str:
|
|
return self._expression
|
|
|
|
def get_components(self) -> dict:
|
|
return self.components
|
|
|
|
@property
|
|
def datatype(self) -> str:
|
|
return self._resulttype
|
|
|
|
def get_parsed_expression(self) -> str:
|
|
parsed_result = self.expression
|
|
template = self._rule.entity.model.basetemplates.get('attribute_expression')
|
|
|
|
for placeholder, component in self.get_components().items():
|
|
parsed_result = parsed_result.replace('{' + str(placeholder) + '}', template.render(component=component))
|
|
|
|
return parsed_result
|
|
|
|
|
|
class DataVaultEntityRule:
|
|
def __init__(self, entity, name: str, definition: dict = None):
|
|
self.entity = entity
|
|
self.name = name
|
|
self.expression = DataVaultEntityAttributeExpression(self,
|
|
definition.get('expression'),
|
|
definition.get('attributes'),
|
|
definition.get('resulttype')
|
|
)
|
|
|
|
|
|
class MappingSource:
|
|
def __init__(self, model, entity):
|
|
self.model = model
|
|
self.entity = entity
|
|
self.name = entity.name
|
|
|
|
def get_target_entity_names(self, active_only: Boolean = False) -> list:
|
|
entities = []
|
|
if self.name in self.model.mappings:
|
|
for m in self.model.mappings.get(self.name).get_targetmappings().values():
|
|
if active_only and m.type != 'mappingonly':
|
|
entities.append(m.targetentityname)
|
|
else:
|
|
entities.append(m.targetentityname)
|
|
|
|
return entities
|
|
|
|
def get_target_entities(self) -> list:
|
|
"""returns list of direct mapped entites"""
|
|
if self.name in self.model.mappings:
|
|
return self.model.mappings.get(self.name).get_target_entities()
|
|
return []
|
|
|
|
def get_target_entity_hash_components(self, target) -> list:
|
|
"""Get the components for a target-entities hashkey. Since there a different naming for each source, use the attribute names of the source"""
|
|
|
|
hashcomponents = []
|
|
|
|
attributemappings = self.get_attribute_mappings_by_target(target.name) # stg -> einzelne entity
|
|
|
|
for am in attributemappings:
|
|
hashcomponents.append({'sourceexpression': am.source,
|
|
'targetattribute': am.target
|
|
})
|
|
|
|
# FIXME: bkcc-attribute sollte immer am Anfang stehen.
|
|
|
|
return hashcomponents
|
|
|
|
def get_target_entities_hash_components(self) -> dict:
|
|
|
|
hash_keys = {}
|
|
targets = self.get_target_entities() # Direct mapped Targets
|
|
|
|
for target in targets:
|
|
|
|
if target.type in ["hub", "reference"]: #TODO: reference ist hier eine besonderheit, da der Key zwar nicht gehashed wird, aber genau dadurch "anfällig" für umbennenung von Quelle zu ziel ist.
|
|
hash_keys[target.key_attribute.name] = dict(hashattribute=target.key_attribute,
|
|
components=self.get_target_entity_hash_components(target),
|
|
targetentity=target)
|
|
|
|
if target.type in ["link"]:
|
|
# Für den Link: attribute aus mapping + linked entity attribute aus mapping
|
|
linkhashcomponents = []
|
|
for le in target.get_linked_entities():
|
|
linkhashcomponents.extend(self.get_target_entity_hash_components(le))
|
|
|
|
linkhashcomponents.extend(self.get_target_entity_hash_components(target))
|
|
|
|
hash_keys[target.key_attribute.name] = dict(hashattribute=target.key_attribute,
|
|
components=linkhashcomponents,
|
|
targetentity=target)
|
|
|
|
if target.type in ["satellite"]:
|
|
hash_keys[target.hashdiff_fk_attribute.name] = dict(hashattribute=target.hashdiff_fk_attribute,
|
|
components=self.get_target_entity_hash_components(
|
|
target),
|
|
targetentity=target)
|
|
# Wenn der referenzierte Hub bzw. Link-Key noch nicht in der Liste steht
|
|
if target.get_parent_key_attribute().name not in hash_keys:
|
|
hash_keys[target.get_parent_key_attribute().name] = {
|
|
"hashattribute": target.get_parent_key_attribute(), "components": []}
|
|
|
|
return hash_keys
|
|
|
|
def get_target_entities_hash_attributes(self) -> dict:
|
|
attributes = dict()
|
|
targets = self.get_target_entities()
|
|
|
|
for target in targets:
|
|
if target.type in ["hub", "link"]:
|
|
attributes[target.key_attribute.name] = target.key_attribute
|
|
|
|
# if target.type in ["link"]: #BUG: Dieser mechanismuss sorgt dafür, dass die reihenfolge vom mapping abweicht.
|
|
# for le in target.getLinkedEntities():
|
|
# attributes[le.key_attribute.name] = le.key_attribute
|
|
|
|
if target.type in ["satellite"]:
|
|
attributes[target.hashdiff_fk_attribute.name] = target.hashdiff_fk_attribute
|
|
# Wenn der referenzierte Hub bzw. Link-Key noch nicht in der Liste steht
|
|
if target.get_parent_key_attribute().name not in attributes:
|
|
attributes[target.get_parent_key_attribute().name] = target.get_parent_key_attribute()
|
|
|
|
return attributes
|
|
|
|
def get_mappings(self) -> dict:
|
|
"""return a dict of mappings, specified for the entity."""
|
|
return self.model.get_mapping(self.name)
|
|
|
|
def get_attribute_mappings_by_target(self, target_entity_name: str) -> dict:
|
|
"""return a dict of source to target attribute-mappings """
|
|
return self.get_mappings().get_attribute_mappings_by_target(target_entity_name)
|
|
|
|
class DynamicProperties(object):
|
|
|
|
@classmethod
|
|
def from_kwargs(cls, **kwargs):
|
|
obj = cls()
|
|
for (field, value) in kwargs.items():
|
|
setattr(obj, field, value)
|
|
return obj
|
|
|
|
def __getattr__(self, attr):
|
|
return None
|
|
|
|
#TODO: New Feature: Option zum Exportieren einer Objektinstanz nach yaml
|
|
|