snapshot current state before gitea sync

This commit is contained in:
2026-02-18 10:50:24 +01:00
commit 93a3f9e6fe
59 changed files with 4540 additions and 0 deletions
BIN
View File
Binary file not shown.
+599
View File
@@ -0,0 +1,599 @@
import logging
import re
from shutil import Error
import sys
import collections
from xmlrpc.client import Boolean
from rich.panel import Panel
from rich.tree import Tree
from rich import print
from jinja2 import TemplateNotFound, UndefinedError
def add_to_log_tree(tree: Tree, path: tuple, currentindex):
if currentindex < len(path):
child = tree.add(str(path[currentindex]))
currentindex+=1
return add_to_log_tree(child, path, currentindex)
else:
return tree
def log(level: int, title: str, path: tuple, message: str, printout: bool = False):
tree=Tree(str(path[0]), highlight=True)
last = add_to_log_tree(tree ,path, 1)
last.add(message)
print(Panel(tree, title="[red]"+str(title), padding=1,title_align="left" ))
#def log(level: int, title: str, path: tuple, message: str, printout: bool = False):
# tree=Tree(str(path[0]))
# print(Panel(tree, title="[red]"+str(title), expand=False,padding=1 ))
# line = ""
# line = "----------------------------- " + title + " -----------------------------"
# logging.log(level, line)
# if printout:
# print(line)
# lvl = 0
# for i in path:
# if lvl == 0:
# line = " " + str(i)
# logging.log(level, line)
# if printout:
# print(line)
# else:
# line = (" " * lvl) + " |-- " + str(i)
# logging.log(level, line)
# if printout:
# print(line)
# lvl = lvl + 1
#
# line = (" " * lvl) + " |--> " + message
# logging.log(level, line )
#
# if printout:
# print(line)
# print("------------------------------------------------------------" + "-" * len(title))
class ErrorCollection:
def __init__(self):
self._errors = []
def add(self, title, path, message):
self._errors.append(dict(title = title, path = path, message = message))
def append(self, errors):
self._errors += errors._errors
@property
def count(self) -> int:
return len(self._errors)
@property
def errors(self) -> list:
return self._errors
class DBEntity:
def __init__(self, entityname: str, entity, entitydefaults: dict, entitydbdefinition=None):
self.entity = entity
self._definition = entitydefaults | (entitydbdefinition or {})
self.name = self._definition.get('name', entityname)
self.database = self._definition.get('database', '')
self.schema = self._definition.get('schema', '')
self.filegroup = self._definition.get('filegroup', '')
self.properties = self._definition.get('properties', {})
def get_qualifier(self, include_db: Boolean = True) -> str:
""" returns rendered Qualifier"""
return self.entity.model.basetemplates.get('table_qualifier').render(dbentity=self, includeDB=include_db)
class DataVaultEntityAttribute(object):
#TODO: ggf. die Definition der Attribute umstellen von Liste auf dict:
# attributes:
# - {name: cust_no, type: 'varchar(32)'}
# attributes:
# cust_no: {type: 'varchar(32)'}
#
# Pattern zum Ersetzen in VSCODE: suche: - \{name: (.*), Ersetze durch: $1: {
# __slots__ = ('_definition'
# ,'entity'
# ,'name'
# ,'datatype'
# ,'is_mandatory'
# ,'logicalname'
# ,'description'
# ,'role'
# ,'_ghostvalue' )
def __init__(self, entity, definition):
"""
:rtype: object
"""
#self.id = uuid.uuid4().hex
self._definition = definition
self.entity = entity
self.name = definition.get('name', '')
#self.datatype = definition.get('type', '') # Old: type="varchar(100)"
self._type = definition.get('type', '') # New: type='varchar'
self.length = definition.get('length', '')
self.precision = definition.get('precision', '')
self.scale = definition.get('scale', '')
self.default = definition.get('default', '')
#TODO: self.datatype als property => varchar(100)
# self.type als native type
self.is_mandatory = definition.get('mandatory', False)
self.logicalname = definition.get('logicalname', '') #fixme: in schema aufnehmnen
self.description = definition.get('description', '')
self.role = definition.get('role',
self.entity.model.config.entitydefaults[self.entity.type].get('attribute_role','base'))
self._ghostvalue = definition.get('ghost')
self.properties = definition.get('props', {})
self.order = definition.get('order')
self.is_pii = definition.get('pii', False)
@property
def datatype(self) -> str: # => Full datatype
return self._type
@property
def ghostvalue(self) -> str:
if not self._ghostvalue:
return self.entity.model.config.datavault.ghostrecord.get(self.native_datatype.lower(),
self.entity.model.config.datavault.ghostrecord.get('other', ''))
else:
return self._ghostvalue
@property
def native_datatype(self) -> str:
"""Returns the native datatype expression. E.g. nvarchar"""
return self.datatype[:self.datatype.find('(')].strip().lower() if self.datatype.find(
'(') != -1 else self.datatype
# IMPROVE: Müsste eigentlich in der Attributdefinition explizit drin stehen.
@property
def native_datatypelength(self) -> str:
"""Returns the native datatype length. E.g. nvarchar(100) -> 100 """
return self.datatype[self.datatype.find('(') + 1:self.datatype.find(')')].strip().lower() if self.datatype.find('(') != -1 else ''
# IMPROVE: Müsste eigentlich in der Attributdefinition explizit drin stehen.
@property
def column_definition(self) -> str:
"""Returns the columndefinition, based on the configured template."""
return self.entity.model.basetemplates.get('column_ddl').render(attribute=self)
def copy(self, newname: str = ''):
if newname:
copy = DataVaultEntityAttribute(self.entity, self._definition)
copy.name = newname
return copy
else:
return DataVaultEntityAttribute(self.entity, self._definition)
def validate(self, spec):
errors = ErrorCollection()
if not spec:
return errors
logging.debug('Validating attribute <%s>',self.name)
is_valid = False
for datatype, definition in self.entity.model.sys_specifications[spec]['datatypes'].items():
matches = re.findall(definition.get('pattern'), self._type, re.MULTILINE | re.IGNORECASE)
for m in matches:
is_valid = True
if not is_valid:
logging.debug('datatype <%s> of attribute <%s> not valid',self._type, self.name)
errors.add("VALIDATION ERROR",
(self.entity.filename,"Attribute", "<" + self.name + ">"),
f'Datatype <{self._type}> not valid (not matching any pattern in {spec})')
return errors
class DerivedAttribute(DataVaultEntityAttribute):
pass
class GeneratorEntity:
def __init__(self, model, filename: str, definition: dict = None):
# logging.info('Creating Entity %s',definition['name'])
self.model = model
self.filename = filename
self._definition = definition
self.id = definition.get('name')
self.name = definition.get('name')
self.type = definition.get('type')
self.subtype = definition.get('subtype', 'base')
self.description = definition.get('description', '')
self.generate = definition.get('generate', 1)
self.extra = definition.get('extra', {})
self._sql_pre_hook = definition.get('sql_pre_hook', '')
self._sql_post_hook = definition.get('sql_post_hook', '')
@property
def type_display_name(self) -> str:
return self.model.get_types().get(self.type).get('displayname')
@property
def subtype_display_name(self) -> str:
return self.model.get_subtypes().get(self.subtype).get('displayname')
def render_template(self, templatefilename: str):
"""Renders the entity by a given template an returns the result als string."""
try:
template = self.model.templateEnvironment.get_template(templatefilename)
#print( self.model.templateEnvironment.loader.get_source( self.model.templateEnvironment, templatefilename))
#checksum = hashlib.md5(str(template).encode()).hexdigest().upper()
output = template.render(
entity=self,
templatename=templatefilename,
templateversion=''
)
except TemplateNotFound:
print("")
print(Panel(f"[red]Error while rendering entity-templates[/red]: Template {templatefilename} not found.", title="[red]RENDER ERROR", padding=1,title_align="left" ))
logging.error(f"Template {templatefilename} not found.")
#print(f"Template {templatefilename} not found.")
sys.exit(2)
except UndefinedError as e:
print("")
logging.error(f"Error while rendering entity {self.name} :")
logging.error(e)
print(f"Error while rendering entity {self.name} :", e)
sys.exit(2)
return output
@property
def sql_pre_hook(self) -> str:
return self.model.templateEnvironment.from_string(self._sql_pre_hook).render(this=self)
@property
def sql_post_hook(self) -> str:
return self.model.templateEnvironment.from_string(self._sql_post_hook).render(this=self)
def get_component_entities(self):
return []
class Layer(GeneratorEntity):
def __init__(self, model, layerid, definition):
GeneratorEntity.__init__(self, model, '', definition)
self.id = layerid
@property
def database(self) -> str:
return self.model.config.layer.get(self.id).get('defaultdatabaseobject').get('database')
@property
def schema(self) -> str:
return self.model.config.layer.get(self.id).get('defaultdatabaseobject').get('schema')
@property
def filegroup(self) -> str:
return self.model.config.layer.get(self.id).get('defaultdatabaseobject').get('filegroup')
@property
def sys_specification(self) -> str:
return self.model.config.layer.get(self.id).get('sys_specification','')
@property
def connection_name(self) -> str:
return self.model.config.layer.get(self.id).get('connectionname','')
def get_entities(self) -> list:
return {k: v for k, v in self.model.entities.items() if v.layer == self}
def get_entity_count(self):
return sum(1 for e in self.model.entities.values() if e.layer == self)
class DataVaultEntity(GeneratorEntity):
def __init__(self, model, filename: str, definition: dict = None):
GeneratorEntity.__init__(self, model, filename, definition)
# logging.info('Creating Entity %s',definition['name'])
self._layername = definition.get('layer', self.model.config.entitydefaults[self.type]['layer'])
self.attributes = collections.OrderedDict()
excludecommonattribute = definition.get('exclude_commonattributes', [])
commonattributes = (a for a in self.model.config.entitydefaults.get(self.type, {}).get('attributes', []) if a not in excludecommonattribute )
for attribute_rolename in commonattributes:
attr = DataVaultEntityAttribute(self, self.model.config.commonattributes.get(attribute_rolename))
attr.role = attribute_rolename
self.add_attribute(attr)
for attrdef in definition.get('attributes',[]):
self.add_attribute(DataVaultEntityAttribute(self, attrdef))
@property
def layer(self) -> Layer:
"""Returns the entity layer."""
return self.model.get_layer(self._layername)
@property
def dbentity(self) -> DBEntity:
return DBEntity(self.name,
self,
self.model.config.layer.get(self._layername, {}).get('defaultdatabaseobject', {}),
self._definition.get('dbentity'))
def add_attribute(self, attribute: DataVaultEntityAttribute):
"""add an attribute to the entity."""
self.attributes[attribute.name] = attribute
def get_attribute(self, name: str) -> DataVaultEntityAttribute:
"""get attribute by name."""
return self.attributes.get(name, self.get_foreign_attribute(name))
def get_attributes(self, roles: list = 'all', exclude: list = ()) -> list[DataVaultEntityAttribute]:
"""returns a list of attributes for one or more given roles. You can exclude certain attribute-roles"""
if 'all' in roles:
return [a for a in self.attributes.values() if a.role not in exclude] + self.get_foreign_attributes()
elif 'fk' in roles: # IMPROVE: könnte vereinfacht werden, wenn Attributreferenzen bereits aufgelöst wären.
return [a for a in self.attributes.values() if a.role in roles] + self.get_foreign_attributes()
else:
return [a for a in self.attributes.values() if a.role in roles and a.role not in exclude]
def get_foreign_attribute(self, name: str) -> DataVaultEntityAttribute:
# gibt attribut, dass auf einer verbundenen Entität liegt zurück. Diese Methode ist als Erweiterung für
# getAttribute gedacht und muss von den jeweiligen Entitäten implementiert werden.
pass
def get_foreign_attributes(self) -> DataVaultEntityAttribute:
# gibt eine Liste von attributen, die auf einer verbundenen Entität liegen zurück. Diese Methode ist als
# Erweiterung für getAttributes gedacht und muss von den jeweiligen Entitäten implementiert werden.
return []
def get_role_attribute(self, role: str) -> DataVaultEntityAttribute:
# returns a specific role-attribute (recordsource, loaddate, hashdiff) as configured in config
try:
return self.get_attributes(roles=role)[0]
except IndexError:
return None
def contains_pii_attributes(self) -> Boolean:
return any([True for a in self.attributes.values() if a.is_pii ])
def get_source_entities(self, active_only: Boolean = True):
"""returns list of source entities by lookup of the target in the mapping-definition"""
# Hier dürften nur Entities vom type = delivery auftauchen
entities = dict()
for sourcename, mapping in self.model.get_mappings().items():
tm = mapping.get_targetmapping_by_target(self.name) # FIXME: hier ggf. auch role-hubs einbeziehen:
if tm:
if active_only and tm.type != 'mappingonly':
entities[sourcename] = self.model.get_entity(sourcename)
elif not active_only:
entities[sourcename] = self.model.get_entity(sourcename)
return entities
def validate(self) -> ErrorCollection:
return ErrorCollection()
def get_component_attributes(self, attributename: str) -> list:
components = []
for sourcename, mapping in self.model.get_mappings().items():
tm = mapping.get_targetmapping_by_target(self.name)
if tm:
for am in tm.get_attribute_mappings():
if am.targetattribute_name == attributename:
srcentity = self.model.get_entity(sourcename)
for ea in am.source.get_expression_attributes().values():
components.append(dict(attribute=self.get_attribute(attributename),
sourceentity=srcentity,
sourceattribute=ea
))
return components
class DataVaultEntityAttributeExpression: #FIXME:wird das so noch benötigt? vgl Mapping > AttributeMappingExpression
# VALUE_IND:
# expression:
# "max(case when {1} = 'IND' then {2} end)"
# components:
# 1: [thp, shortname]
# 2: [tls, value]
def __init__(self, rule, expression: str = '', components: dict = {}, resulttype: str = ''):
"""
expression => Expression, e.g.: "max(case when {placeholder1} = 'IND' then {placeholder2} end)"
components => dict of list, e.g.: {placeholder1: [tablealias, column],
placeholder2: [tablealias, column]}
"""
self._expression = expression
self.components = components
self._resulttype = resulttype
self._rule = rule
@property
def expression(self) -> str:
return self._expression
def get_components(self) -> dict:
return self.components
@property
def datatype(self) -> str:
return self._resulttype
def get_parsed_expression(self) -> str:
parsed_result = self.expression
template = self._rule.entity.model.basetemplates.get('attribute_expression')
for placeholder, component in self.get_components().items():
parsed_result = parsed_result.replace('{' + str(placeholder) + '}', template.render(component=component))
return parsed_result
class DataVaultEntityRule:
def __init__(self, entity, name: str, definition: dict = None):
self.entity = entity
self.name = name
self.expression = DataVaultEntityAttributeExpression(self,
definition.get('expression'),
definition.get('attributes'),
definition.get('resulttype')
)
class MappingSource:
def __init__(self, model, entity):
self.model = model
self.entity = entity
self.name = entity.name
def get_target_entity_names(self, active_only: Boolean = False) -> list:
entities = []
if self.name in self.model.mappings:
for m in self.model.mappings.get(self.name).get_targetmappings().values():
if active_only and m.type != 'mappingonly':
entities.append(m.targetentityname)
else:
entities.append(m.targetentityname)
return entities
def get_target_entities(self) -> list:
"""returns list of direct mapped entites"""
if self.name in self.model.mappings:
return self.model.mappings.get(self.name).get_target_entities()
return []
def get_target_entity_hash_components(self, target) -> list:
"""Get the components for a target-entities hashkey. Since there a different naming for each source, use the attribute names of the source"""
hashcomponents = []
attributemappings = self.get_attribute_mappings_by_target(target.name) # stg -> einzelne entity
for am in attributemappings:
hashcomponents.append({'sourceexpression': am.source,
'targetattribute': am.target
})
# FIXME: bkcc-attribute sollte immer am Anfang stehen.
return hashcomponents
def get_target_entities_hash_components(self) -> dict:
hash_keys = {}
targets = self.get_target_entities() # Direct mapped Targets
for target in targets:
if target.type in ["hub", "reference"]: #TODO: reference ist hier eine besonderheit, da der Key zwar nicht gehashed wird, aber genau dadurch "anfällig" für umbennenung von Quelle zu ziel ist.
hash_keys[target.key_attribute.name] = dict(hashattribute=target.key_attribute,
components=self.get_target_entity_hash_components(target),
targetentity=target)
if target.type in ["link"]:
# Für den Link: attribute aus mapping + linked entity attribute aus mapping
linkhashcomponents = []
for le in target.get_linked_entities():
linkhashcomponents.extend(self.get_target_entity_hash_components(le))
linkhashcomponents.extend(self.get_target_entity_hash_components(target))
hash_keys[target.key_attribute.name] = dict(hashattribute=target.key_attribute,
components=linkhashcomponents,
targetentity=target)
if target.type in ["satellite"]:
hash_keys[target.hashdiff_fk_attribute.name] = dict(hashattribute=target.hashdiff_fk_attribute,
components=self.get_target_entity_hash_components(
target),
targetentity=target)
# Wenn der referenzierte Hub bzw. Link-Key noch nicht in der Liste steht
if target.get_parent_key_attribute().name not in hash_keys:
hash_keys[target.get_parent_key_attribute().name] = {
"hashattribute": target.get_parent_key_attribute(), "components": []}
return hash_keys
def get_target_entities_hash_attributes(self) -> dict:
attributes = dict()
targets = self.get_target_entities()
for target in targets:
if target.type in ["hub", "link"]:
attributes[target.key_attribute.name] = target.key_attribute
# if target.type in ["link"]: #BUG: Dieser mechanismuss sorgt dafür, dass die reihenfolge vom mapping abweicht.
# for le in target.getLinkedEntities():
# attributes[le.key_attribute.name] = le.key_attribute
if target.type in ["satellite"]:
attributes[target.hashdiff_fk_attribute.name] = target.hashdiff_fk_attribute
# Wenn der referenzierte Hub bzw. Link-Key noch nicht in der Liste steht
if target.get_parent_key_attribute().name not in attributes:
attributes[target.get_parent_key_attribute().name] = target.get_parent_key_attribute()
return attributes
def get_mappings(self) -> dict:
"""return a dict of mappings, specified for the entity."""
return self.model.get_mapping(self.name)
def get_attribute_mappings_by_target(self, target_entity_name: str) -> dict:
"""return a dict of source to target attribute-mappings """
return self.get_mappings().get_attribute_mappings_by_target(target_entity_name)
class DynamicProperties(object):
@classmethod
def from_kwargs(cls, **kwargs):
obj = cls()
for (field, value) in kwargs.items():
setattr(obj, field, value)
return obj
def __getattr__(self, attr):
return None
#TODO: New Feature: Option zum Exportieren einer Objektinstanz nach yaml
+105
View File
@@ -0,0 +1,105 @@
import logging
from pathlib import Path
import sys
import yaml
class ConfigDict(dict):
def __getattr__(self, item):
return super().__getitem__(item)
def __setattr__(self, item, value):
return super().__setitem__(item, value)
def __dir__(self):
return super().__dir__() + [str(k) for k in self.keys()]
def __init__(self, initialvalues: dict = {}):
self.update(initialvalues)
class Config(ConfigDict):
def __dir__(self):
return super().__dir__() + [str(k) for k in self.keys()]
def __init__(self):
self['model'] = ConfigDict()
self['paths'] = ConfigDict()
self['datavault'] = ConfigDict()
self['generator'] = ConfigDict()
self['layer'] = ConfigDict()
self['vars'] = ConfigDict()
self['pre_hooks'] = ConfigDict()
self['post_hooks'] = ConfigDict()
self['commonattributes'] = ConfigDict()
self['entitydefaults'] = ConfigDict()
self['basetemplates'] = ConfigDict()
self['jinja'] = ConfigDict()
self['sys_specification'] = ConfigDict()
def load(self, filename, schema, validation_handler):
self.filename = filename
self.path = Path(filename).absolute().parent
try:
with open(filename, 'r') as file:
self.content = yaml.safe_load(file)
validation_handler('Configuration: ' + filename, schema, self.content)
except FileNotFoundError as e:
print("")
print(e)
logging.error(e)
sys.exit(2)
except yaml.scanner.ScannerError as e:
print("")
logging.error(e)
sys.exit(2)
except yaml.parser.ParserError as e:
print("")
logging.error(e)
sys.exit(2)
self.model.update(dict(name = self.content.get('model').get('name')
, ignore_file_prefix = self.content.get('model').get('ignore_file_prefix','_')))
self.paths.update(dict(log = self.content.get('model').get('paths').get('log')
, entities = self.content.get('model').get('paths').get('entities')
, mappings = self.content.get('model').get('paths').get('mappings')
, templates = self.content.get('model').get('paths').get('templates')
, output = self.content.get('model').get('paths').get('output')
))
self.vars.update(self.content.get('vars', {}))
self.pre_hooks.update(self.content.get('pre_hooks', {}))
self.post_hooks.update(self.content.get('post_hooks', {}))
self.datavault.update(dict(keyattribute = self.content.get('keyattribute')
, zerokey = self.content.get('zerokey','')
, constraints = ConfigDict(self.content.get('constraints', {}))
, hash = ConfigDict(dict(algorithm = self.content.get('hash_algorithm')
,separator = self.content.get('hash_separator')
,case = self.content.get('hash_case')))
, business_key_treatment = ConfigDict(self.content.get('business_key_treatment'))
, hashdiff_attribute_treatment = ConfigDict(self.content.get('hashdiff_attribute_treatment'))
, ghostrecord = ConfigDict(self.content.get('ghostrecord'))
))
self.commonattributes.update(self.content.get('commonattributes'))
self.basetemplates.update(self.content.get('templates'))
self.jinja.update(dict(environment = ConfigDict(self.content.get('jinja', {}).get('environment', {}))))
if self.content.get('sys_specification'):
self.sys_specification.update(self.content.get('sys_specification'))
for k,v in self.content.get('layer').items():
self.layer[k] = ConfigDict(v)
for k,v in self.content.get('entitydefaults').items():
self.entitydefaults[k] = ConfigDict(v)
for k,v in self.content.get('generator').items():
self.generator[k] = ConfigDict(v)
+102
View File
@@ -0,0 +1,102 @@
class DagNode:
def __init__(self, name, entity):
self.name = name
self.entity = entity
#self.order = None
self.level = 0
self._visitedby = list()
def __repr__(self):
return "<" + self.name + ">"
# TODO: Gewichtung von Edges: Bspw. verweist ein Satellite zwar auf einen Hub - ist aber nicht zwingend auf dessen vorheriges Laden angewiesen.
# Daher könnte man über einen Gewichtung der Edges von 0 - Loose bis 1 - strict das Level steuern
# stage -> sat : strict
# hub -> sat : loose
# hub, sat -> pit : strict
class Dag:
def __init__(self, model):
self.model = model
self.nodes = dict()
self.edges = list()
def reset(self):
for n in self.nodes.values():
n.level = 0
def add_node(self, node: DagNode):
self.nodes[node.name] = node
def add_edge(self, edge: tuple):
self.edges.append(edge)
def get_node(self, name):
return self.nodes.get(name)
def get_successors(self, nodename):
return [self.nodes.get(e[1]) for e in self.edges if e[0] == nodename]
def get_predecessor(self, nodename):
return [self.nodes.get(e[0]) for e in self.edges if e[1] == nodename]
def get_roots(self):
return [n for n in self.nodes.values() if len(self.get_predecessor(n.name)) == 0]
def get_forward_tree(self, node: DagNode, excludes=None, level=1, result = None):
if result is None:
result = []
if excludes is None:
excludes = []
if node.name not in excludes:
node.level = level
if node not in result:
result.append(node)
for sn in self.get_successors(node.name):
if sn.level <= level:
sn.level = level + 1
result = self.get_forward_tree(sn, excludes, sn.level, result)
return result
def get_backward_tree(self, node: DagNode, excludes=None, level=0, result = None):
if result is None:
result = []
if excludes is None:
excludes = []
if node.name not in excludes:
node.level = level
if node not in result:
result.append(node)
for sn in self.get_predecessor(node.name):
if sn.level >= level:
sn.level = level - 1
result = self.get_backward_tree(sn,excludes,sn.level, result)
return result
def num_level(self, nodes:list) -> int:
return len(set([n.level for n in nodes]))
def reverse_level(self, nodes:list) -> list:
num_level = self.num_level(nodes)
for n in nodes:
n.level = num_level + n.level
return nodes
#def get_fastestPath(self):
# returns fastest Path to each target.
# Priority: fastest load of source to target.
+82
View File
@@ -0,0 +1,82 @@
import re
from DataVaultGenerator.Components import DataVaultEntity, DataVaultEntityAttribute, ErrorCollection
class Bridge(DataVaultEntity):
def __init__(self, model, filename, definition: dict = None):
DataVaultEntity.__init__(self, model, filename, definition)
self.updatemode = self._definition.get('updatemode', 'full')
@property
def snapshotattribute(self):
return DataVaultEntityAttribute(self, self._definition['snapshotattribute'])
@property
def snapshotquery(self, include_db=True):
parsed_result = self.rawquery
for alias, entity in self.get_query_entities().items():
if entity:
replacement = self.model.basetemplates.get('query_entity_alias').render(entity=entity, includeDB=include_db, alias=str(alias))
parsed_result = parsed_result.replace('{' + str(entity.name) + ':' + str(alias) + '}', replacement)
parsed_result = parsed_result.replace('{' + str(entity.name) + '}', replacement)
return parsed_result
def get_query_entities(self):
""" Parses Querystrings like: Select * from {entityname1:alias1} join {entityname2:alias2} and returns a list
of entity instances. """
regex = r"\{(.*?):(.*?)?\}"
entities = {}
matches = re.finditer(regex, self.rawquery, re.MULTILINE)
for matchNum, match in enumerate(matches):
for groupNum in range(0, len(match.groups())):
entities[match.group(2)] = self.model.get_entity(match.group(1))
#regex101.com
regex = r"\{(.[^:]*?)\}"
matches = re.finditer(regex, self.rawquery, re.MULTILINE)
for matchNum, match in enumerate(matches):
for groupNum in range(0, len(match.groups())):
entities[match.group(1)] = self.model.get_entity(match.group(1))
return entities
@property
def rawquery(self):
return self._definition.get('snapshotquery', '')
def get_linked_entities(self): #FIXME: in docu aufnehmen?, überhaupt relevant?
"""returns a list of linked entities."""
return [self.model.get_entity(le) for le in self._definition['hubs'] + self._definition.get('links',[])]
def has_attributes(self):
return True if self._definition.get('bridgeattributes') else False
def get_bridgeattributes(self):
return [DataVaultEntityAttribute(self, attrdef) for attrdef in self._definition.get('bridgeattributes', [])]
def validate(self):
errors = ErrorCollection()
for attr in self.attributes.values():
spec = self.layer.sys_specification
errors.append(attr.validate(spec))
# Validating entity references:
if self._definition.get('snapshotquery'):
for name, e in self.get_query_entities().items():
if e is None:
errors.add("VALIDATION ERROR",
(self.filename, "Bridge", "<" + self.name + ">"),
f'query-entity <{name}> not found.')
return errors
def get_component_entities(self):
return [{'entity': self, 'component': c, 'type': c.type} for c in self.get_query_entities().values() if self != c ]
+30
View File
@@ -0,0 +1,30 @@
from DataVaultGenerator.Components import DataVaultEntity, ErrorCollection, MappingSource
class Composite(DataVaultEntity, MappingSource):
def __init__(self, model, filename, definition: dict = None):
DataVaultEntity.__init__(self, model, filename, definition)
MappingSource.__init__(self, model, self)
@property
def query(self):
return self.model.get_parsed_query(self, self.rawquery)
def get_query_entities(self):
return self.model.get_query_entities(self.rawquery)
@property
def rawquery(self):
return self._definition.get('query', '')
def get_component_entities(self):
return [{'entity': self, 'component': c, 'type': c.type} for c in self.get_query_entities().values()]
def validate(self):
errors = ErrorCollection()
for attr in self.attributes.values():
spec = self.layer.sys_specification
errors.append(attr.validate(spec))
return errors
+89
View File
@@ -0,0 +1,89 @@
from DataVaultGenerator.Components import DataVaultEntity, MappingSource, ErrorCollection
class Delivery(DataVaultEntity, MappingSource):
def __init__(self, model, filename, definition: dict = None):
DataVaultEntity.__init__(self, model, filename, definition)
MappingSource.__init__(self, model, self)
self.properties = definition.get('properties', {})
@property
def delta_attribute(self):
return self.get_attribute(self._definition.get('deltaattribute'))
@property
def delta_initialvalue(self):
return self._definition.get('deltainitialvalue')
@property
def recordsource(self):
return self._definition.get('recordsource', '')
@property
def batchmode(self):
return self._definition.get('batchmode', 'single') # multi, single
@property
def deliverymode(self):
return 'delta' if self._definition.get('deltaattribute') else 'full' # multi, single
@property
def interfaces(self):
return [self.model.get_interface(i) for i in self._definition['interfaces']]
@property
def ldts_source(self):
return self.get_attribute(self._definition.get('ldts_source'))
@property
def overwrite_ldts(self):
return True if self._definition.get('ldts_source') else False
@property
def query(self):
return self._definition.get('query', '')
@property
def source_system(self):
if self._definition.get('sourcesystem'):
return self.model.get_source_system(self._definition.get('sourcesystem'))
else:
return self.interfaces[0].source_system
@property
def source_type(self):
return self._definition.get('sourcetype',self.interfaces[0].source_type)
def get_component_entities(self):
return [{'entity': self, 'component': c, 'type': c.type} for c in
self.interfaces]
def validate(self):
errors = ErrorCollection()
for attr in self.attributes.values():
spec = self.layer.sys_specification
errors.append(attr.validate(spec))
# Validating entity references:
for i in self._definition['interfaces']:
if self.model.get_interface(i) is None:
errors.add("VALIDATION ERROR",
(self.filename, "Delivery", "<" + self.name + ">"),
f'Interface <{i}> not found')
if self._definition.get('deltaattribute'):
if self.delta_attribute is None:
errors.add("VALIDATION ERROR",
(self.filename, "Delivery", "<" + self.name + ">"),
f'Deltaattribute <{self._definition.get("deltaattribute")}> not found in attributes.')
if self._definition.get('ldts_source'):
if self.ldts_source is None:
errors.add("VALIDATION ERROR",
(self.filename, "Delivery", "<" + self.name + ">"),
f'ldts_source <{self._definition.get("ldts_source")}> not found in attributes.')
return errors
@@ -0,0 +1,22 @@
from DataVaultGenerator.Components import DataVaultEntity, ErrorCollection
class GenericTable(DataVaultEntity):
def __init__(self, model, filename, definition: dict = None):
DataVaultEntity.__init__(self, model, filename, definition)
def get_component_entities(self):
c = [{'entity': self, 'component': c, 'type': c.type}
for c in self.model.get_entities_by_type('generictransformation') if self in c.get_target_entities()
]
#FIXME: Um GenericTask erweitern
return c
def validate(self):
errors = ErrorCollection()
for attr in self.attributes.values():
spec = self.layer.sys_specification
errors.append(attr.validate(spec))
return errors
@@ -0,0 +1,45 @@
from DataVaultGenerator.Components import ErrorCollection, GeneratorEntity, DBEntity
class GenericTask(GeneratorEntity):
def __init__(self, model, filename, definition: dict = None):
GeneratorEntity.__init__(self, model, filename, definition)
self._layername = definition.get('layer', self.model.config.entitydefaults[self.type]['layer'])
@property
def layer(self):
"""Returns the entity layer."""
return self.model.get_layer(self._layername)
def validate(self):
errors = ErrorCollection()
# Validating entity references:
for e in self._definition.get('sources'):
if self.model.get_entity(e) is None:
errors.add("VALIDATION ERROR",
(self.filename, "Generic Task", "<" + self.name + ">"),
f'source <{e}> not found')
for e in self._definition.get('targets'):
if self.model.get_entity(e) is None:
errors.add("VALIDATION ERROR",
(self.filename, "Generic Task", "<" + self.name + ">"),
f'target <{e}> not found')
return errors
def get_source_entities(self):
"""returns a list of linked entities."""
return [self.model.get_entity(e) for e in self._definition['sources']]
def get_target_entities(self):
"""returns a list of linked entities."""
return [self.model.get_entity(e) for e in self._definition['targets']]
def get_component_entities(self):
return [{'entity': self, 'component': c, 'type': c.type} for c in self.get_source_entities()]
@@ -0,0 +1,77 @@
from DataVaultGenerator.Components import ErrorCollection, GeneratorEntity, DBEntity
class GenericTransformation(GeneratorEntity):
def __init__(self, model, filename, definition: dict = None):
GeneratorEntity.__init__(self, model, filename, definition)
self._layername = definition.get('layer', self.model.config.entitydefaults[self.type]['layer'])
@property
def dbentity(self):
return DBEntity(self.name,
self,
self.model.config.layer.get(self._layername).get('defaultdatabaseobject'),
self._definition.get('dbentity'))
@property
def layer(self):
"""Returns the entity layer."""
return self.model.get_layer(self._layername)
def validate(self):
errors = ErrorCollection()
# Validating entity references:
for e in self._definition.get('sources'):
if self.model.get_entity(e) is None:
errors.add("VALIDATION ERROR",
(self.filename, "Generic Transformation", "<" + self.name + ">"),
f'source <{e}> not found')
for e in self._definition.get('targets'):
if self.model.get_entity(e) is None:
errors.add("VALIDATION ERROR",
(self.filename, "Generic Transformation", "<" + self.name + ">"),
f'target <{e}> not found')
for name, e in self.get_query_entities().items():
if e is None:
errors.add("VALIDATION ERROR",
(self.filename, "Generic Transformation", "<" + self.name + ">"),
f'query-entity <{name}> not found.')
for name, e in self.get_query_entities().items():
if name not in self._definition.get('sources') and name not in self._definition.get('targets'):
errors.add("VALIDATION ERROR",
(self.filename, "Generic Transformation", "<" + self.name + ">"),
f'query-entity <{name}> not specified as source or target.')
return errors
def get_source_entities(self):
"""returns a list of linked entities."""
return [self.model.get_entity(h) for h in self._definition['sources']]
def get_target_entities(self):
"""returns a list of linked entities."""
return [self.model.get_entity(h) for h in self._definition['targets']]
@property
def query(self):
return self.model.get_parsed_query(self, self.rawquery)
def get_query_entities(self):
return self.model.get_query_entities(self.rawquery)
@property
def rawquery(self):
return self._definition.get('query', '')
def get_component_entities(self):
return [{'entity': self, 'component': c, 'type': c.type} for c in self.get_source_entities()]
def get_attributes(self, roles=(), exclude=()):
return []
+84
View File
@@ -0,0 +1,84 @@
from DataVaultGenerator.Components import DataVaultEntity, DataVaultEntityAttribute, ErrorCollection
class Hub(DataVaultEntity):
def __init__(self, model, filename, definition: dict = None):
DataVaultEntity.__init__(self, model, filename, definition)
self.isCaseSensitive = definition.get('caseSesitive', False)
self.role_of = definition.get('roleof')
self.bkcc_attribute = definition.get('bkcc_attribute')
if self.role_of:
self.generate = 0
key = DataVaultEntityAttribute(self, self.model.config.datavault.keyattribute)
key.name = self.key_columnname
self.add_attribute(key)
@property
def key_columnname(self):
"""returns name of the primary Key Attribute. If no name was defined in its definition, a template applies."""
return self._definition.get('key', self.model.basetemplates.get('entity_key_name').render(entity=self))
@property
def hash_attribute_trim(self):
return self._definition.get('key_treatment'
, self.model.config.datavault.business_key_treatment)\
.get('trim',
self.model.config.datavault.business_key_treatment.trim)
@property
def hash_attribute_case(self):
return self._definition.get('key_treatment'
, self.model.config.datavault.business_key_treatment)\
.get('case',
self.model.config.datavault.business_key_treatment.case)
@property
def key_attribute(self):
return self.get_role_attribute(self.model.config.datavault.keyattribute.get('role','sk'))
def get_satellites(self):
return [e for e in self.model.get_entities_by_type('satellite') if e.get_parent_entity() == self]
def get_component_entities(self):
c = [{'entity': self, 'component': c, 'type': c.type} for c in
self.get_source_entities().values()] # holt derzeit nur die Deliveries über die Mappings
if self.role_of:
c.extend(
[{'entity': self, 'component': self.model.get_entity(self.role_of), 'type': 'hub'}]
)
return c
def validate(self):
errors = ErrorCollection()
for attr in self.attributes.values():
spec = self.layer.sys_specification
errors.append(attr.validate(spec))
# Validating entity references:
# role-of-reference
if self.role_of and self.model.get_entity(self.role_of) is None:
errors.add("VALIDATION ERROR",
(self.filename,"Hub", "<" + self.name + ">"),
f'role-of Hub <{self.role_of}> not found.')
# constraints:
enforce_bk_type = self.model.config.datavault.constraints.get('enforce_bk_type')
if enforce_bk_type:
for attr in self.get_attributes('base'):
if attr.native_datatype not in enforce_bk_type:
errors.add("VALIDATION ERROR",
(self.filename,"Hub", "<" + self.name + ">"),
f'Datatype of attribute <{attr.name}> not valid (enforced: {enforce_bk_type})')
return errors
def get_roles(self):
"""returns a list of hubs with this hub as role_of-target"""
return [e for e in self.model.get_entities_by_type('hub', generatable_only=False) if e.role_of == self.name ]
+40
View File
@@ -0,0 +1,40 @@
from DataVaultGenerator.Components import DataVaultEntity, DynamicProperties, ErrorCollection
class Interface(DataVaultEntity):
def __init__(self, model, filename, definition: dict = None):
DataVaultEntity.__init__(self, model, filename, definition)
self.properties = definition.get('properties', {})
self.prop = DynamicProperties.from_kwargs(**definition.get('properties', {}))
@property
def source_type(self):
return self._definition.get('sourcetype')
@property
def source_system(self):
return self.model.get_source_system(self._definition.get('sourcesystem'))
def get_component_entities(self):
return [{'entity': self, 'component': self.source_system,
'type': self.source_system.type}]
def validate(self):
errors = ErrorCollection()
# Validating sourcesystem:
sourcesystem = self._definition.get('sourcesystem')
if self.model.get_source_system(sourcesystem) is None:
errors.add("VALIDATION ERROR",
(self.filename, "Interface", "<" + self.name + ">"),
f'Sourcesystem <{sourcesystem}> not found')
for attr in self.attributes.values():
spec = self.source_system.sys_specification
errors.append(attr.validate(spec))
return errors
+79
View File
@@ -0,0 +1,79 @@
from DataVaultGenerator.Components import DataVaultEntity, DataVaultEntityAttribute, ErrorCollection
class Link(DataVaultEntity):
def __init__(self, model, filename, definition: dict = None):
DataVaultEntity.__init__(self, model, filename, definition)
self.drivingkeys = self._definition.get('drivingkeys',[])
key = DataVaultEntityAttribute(self, self.model.config.datavault.keyattribute)
key.name = self.key_columnname
self.add_attribute(key)
def get_drivingkey_entities(self):
return [self.model.get_entity(d) for d in self.drivingkeys]
def get_foreign_attribute(self, name: str):
for e in self._definition['hubs']:
if self.model.get_entity(e).key_attribute.name == name:
return self.model.get_entity(e).key_attribute
def get_foreign_attributes(self):
fa = []
for e in self._definition['hubs']:
fa.append(DataVaultEntityAttribute(self, definition=dict(name=self.model.get_entity(e).key_attribute.name,
datatype=self.model.get_entity(e).key_attribute.datatype,
role='fk')
))
return fa
@property
def key_columnname(self):
"""returns name of the primary Key Attribute. If no name was defined in its definition, a template applies."""
return self._definition.get('key', self.model.basetemplates.get('entity_key_name').render(entity=self))
@property
def key_attribute(self):
return self.get_role_attribute(self.model.config.datavault.keyattribute.get('role','sk'))
def get_linked_entities(self):
"""returns a list of linked entities."""
return [self.model.get_entity(le) for le in self._definition['hubs'] + self._definition.get('links',[])]
def get_satellites(self):
return [e for e in self.model.get_entities_by_type('satellite') if e.get_parent_entity() == self]
def validate(self):
errors = ErrorCollection()
for attr in self.attributes.values():
spec = self.layer.sys_specification
errors.append(attr.validate(spec))
# Validating entity references:
for name in self._definition['hubs']:
if self.model.get_entity(name) is None:
suggest = self.model.get_entity_name_suggestion('hub', name)
suggest = f'Do you mean <{suggest}>?' if suggest else ''
errors.add("VALIDATION ERROR",
(self.filename,"Link", "<" + self.name + ">"),
f'Hub <{name}> not found. ' + suggest)
# Validating entity references:
for name in self._definition.get('links',[]):
if self.model.get_entity(name) is None:
suggest = self.model.get_entity_name_suggestion('link', name)
suggest = f'Do you mean <{suggest}>?' if suggest else ''
errors.add("VALIDATION ERROR",
(self.filename,"Link", "<" + self.name + ">"),
f'Link <{name}> not found. ' + suggest)
return errors
def get_component_entities(self):
return [{'entity': self, 'component': c, 'type': c.type} for c in
self.get_source_entities().values()] # holt derzeit nur die Deliveries über die Mappings
+88
View File
@@ -0,0 +1,88 @@
from DataVaultGenerator.Components import DataVaultEntity, DataVaultEntityAttribute, ErrorCollection
class PIT(DataVaultEntity):
def __init__(self, model, filename, definition: dict = None):
DataVaultEntity.__init__(self, model, filename, definition)
self.baseentity = self._definition.get('baseentity')
self.snapshotmode = self._definition.get('snapshotmode')
self.snapshottable = self._definition.get('snapshottable')
self.snapshottableattribute = self._definition.get('snapshottableattribute')
def include_ledts(self):
return self._definition.get('include_ledts')
@property
def snapshotattribute(self):
return DataVaultEntityAttribute(self, self._definition['snapshotattribute'])
@property
def snapshotquery(self):
return self.model.get_parsed_query(self, self.rawsnapshotquery)
def get_snaphotquery_entities(self):
return self.model.get_query_entities(self.rawsnapshotquery)
@property
def rawsnapshotquery(self):
return self._definition.get('snapshotquery', '')
@property
def query(self):
return self.model.get_parsed_query(self, self.query)
def get_query_entities(self):
return self.model.get_query_entities(self.rawquery)
@property
def rawquery(self):
return self._definition.get('query', '')
def get_base_entity(self):
return self.model.get_entity(self.baseentity)
def get_satellites(self):
return [self.model.get_entity(sat) for sat in self._definition.get('satellites')]
def has_attributes(self):
return True if self._definition.get('pitattributes') else False
def get_pitattributes(self):
attributes = []
for attr in self._definition.get('pitattributes', []):
attributes.append(self.model.get_entity(attr[0]).get_attribute(attr[1]))
# attributes.append({'attribute': self.model.get_entity(attr[1]).get_attribute(attr[2])
# , 'alias': attr[0]})
return attributes
def validate(self):
errors = ErrorCollection()
# Validating entity references:
if self._definition.get('snapshotquery'):
for name, e in self.get_snaphotquery_entities().items():
if e is None:
errors.add("VALIDATION ERROR",
(self.filename, "PIT", "<" + self.name + ">"),
f'query-entity <{name}> not found.')
if self._definition.get('query'):
for name, e in self.get_query_entities().items():
if e is None:
errors.add("VALIDATION ERROR",
(self.filename, "PIT", "<" + self.name + ">"),
f'query-entity <{name}> not found.')
return errors
def get_component_entities(self):
c = [{'entity': self, 'component': c, 'type': c.type} for c in self.get_snaphotquery_entities().values()]
c.extend([{'entity': self, 'component': c, 'type': c.type} for c in self.get_query_entities().values()])
c.extend([{'entity': self, 'component': c, 'type': c.type} for c in self.get_satellites()])
c.extend([{'entity': self, 'component': self.get_base_entity(), 'type': self.get_base_entity().type}])
return c
+39
View File
@@ -0,0 +1,39 @@
from DataVaultGenerator.Components import DataVaultEntity
#TODO: um Möglichkeit, einen Satellite dranzuhängen erweitern sowie Laden über Mapping ermöglichen
class Reference(DataVaultEntity):
def __init__(self, model, filename, definition: dict = None):
DataVaultEntity.__init__(self, model, filename, definition)
@property
def key_attribute(self): #FIXME: nur EIN Key-Attribute könnte bei Referenz-Daten zu wenig sein.
return self.get_role_attribute('key')
#FIXME: Analag zu dem keyattribute (hk) müsste es für reference Daten einen globalen typ geben mit NOT NULL - Sonst gibt es Probleme beim PK-Index der NICHT NULL sein darf. Workarround: am key-attribue mandatory: true definieren
def get_satellites(self):
return [e for e in self.model.get_entities_by_type('satellite') if e.get_parent_entity() == self]
@property
def data(self):
return self._definition.get('data', {})
@property
def query(self):
return self.model.get_parsed_query(self, self.rawquery)
def get_query_entities(self):
return self.model.get_query_entities(self.rawquery)
@property
def rawquery(self):
return self._definition.get('query', '')
def get_component_entities(self):
c = [{'entity': self, 'component': c, 'type': c.type} for c in
self.get_source_entities().values()] # holt derzeit nur die Deliveries über die Mappings
c.extend([{'entity': self, 'component': c, 'type': c.type}
for c in self.model.get_entities_by_type('generictransformation') if self in c.get_target_entities()
])
#FIXME: um generictask erweitern
return c
+26
View File
@@ -0,0 +1,26 @@
from DataVaultGenerator.Components import ErrorCollection, GeneratorEntity
class Report(GeneratorEntity):
def __init__(self, model, filename, definition: dict = None):
GeneratorEntity.__init__(self, model, filename, definition)
self._layername = definition.get('layer', self.model.config.entitydefaults[self.type]['layer'])
@property
def layer(self):
"""Returns the entity layer."""
return self.model.get_layer(self._layername)
@property
def dbentity(self):
return None
def get_component_entities(self):
return []
def get_attributes(self, roles=(), exclude=()):
return []
def validate(self):
return ErrorCollection()
+54
View File
@@ -0,0 +1,54 @@
from DataVaultGenerator.Components import DataVaultEntity, DataVaultEntityAttribute, ErrorCollection
class Satellite(DataVaultEntity):
def __init__(self, model, filename, definition: dict = None):
DataVaultEntity.__init__(self, model, filename, definition)
self.parent = definition.get('parent')
def get_foreign_attribute(self, name: str) -> DataVaultEntityAttribute:
return self.get_parent_key_attribute() if self.get_parent_key_attribute().name == name else None
def get_parent_entity(self) -> DataVaultEntity:
return self.model.get_entity(self.parent)
def get_parent_key_attribute(self) -> DataVaultEntityAttribute:
return self.get_parent_entity().key_attribute
@property
def hashdiff_fk_attribute(self):#FIXME: Durch config/template flexibler gestalten
return self.get_role_attribute('hashdiff').copy(self.name + "_" + self.get_role_attribute('hashdiff').name)
@property
def hash_attribute_trim(self):
return self._definition.get('hashdiff_attribute_treatment'
, self.model.config.datavault.hashdiff_attribute_treatment)\
.get('trim',
self.model.config.datavault.hashdiff_attribute_treatment.trim)
@property
def hash_attribute_case(self):
return self._definition.get('hashdiff_attribute_treatment'
, self.model.config.datavault.hashdiff_attribute_treatment)\
.get('case',
self.model.config.datavault.hashdiff_attribute_treatment.case)
def get_component_entities(self):
return [{'entity': self, 'component': c, 'type': c.type} for c in
self.get_source_entities().values()] # holt derzeit nur die Deliveries über die Mappings
def validate(self):
errors = ErrorCollection()
for attr in self.attributes.values():
spec = self.layer.sys_specification
errors.append(attr.validate(spec))
# Validating entity references:
if self.get_parent_entity() is None:
errors.add("VALIDATION ERROR",
(self.filename,"Satellite", "<" + self.name + ">"),
f'Parent <{self.parent}> not found')
return errors
@@ -0,0 +1,22 @@
from DataVaultGenerator.Components import GeneratorEntity
class SourceSystem(GeneratorEntity):
def __init__(self, model, filename, definition: dict = None):
GeneratorEntity.__init__(self, model, filename, definition)
self.shortname = self._definition.get('shortname', self.name)
self.sys_specification = self._definition.get('sys_specification', '')
def get_interfaces(self):
return [i for i in self.model.interfaces.values() if i.source_system == self]
def get_interface_count(self):
return sum(1 for i in self.model.interfaces.values() if i.source_system == self)
@property
def connection_name(self):
return self._definition.get('connectionname', '')
@property
def sourcesystem_type(self):
return self._definition.get('sourcesystemtype', '')
+84
View File
@@ -0,0 +1,84 @@
from DataVaultGenerator.Dag import DagNode
from DataVaultGenerator.Components import ErrorCollection, GeneratorEntity
class SubDag(GeneratorEntity):
def __init__(self, model, filename, definition: dict = None):
GeneratorEntity.__init__(self, model, filename, definition)
self.entrypoints = definition.get('entrypoints',[])
self.key = definition.get('key',definition.get('name'))
self.excludes = definition.get('excludes',[])
self.tree = []
def validate(self):
errors = ErrorCollection()
# Validating entity references:
for ep in self.entrypoints:
if self.model.get_entity(ep) is None:
errors.add("VALIDATION ERROR",
(self.filename,"SubDag", "<" + self.name + ">"),
f'Entrypoint <{ep}> not found')
for ex in self.excludes:
if self.model.get_entity(ex) is None:
errors.add("VALIDATION ERROR",
(self.filename,"SubDag", "<" + self.name + ">"),
f'Exclude <{ex}> not found')
return errors
def get_entrypoints_nodes(self):
if self.entrypoints:
return [self.model.dag.get_node(n) for n in self.entrypoints]
else:
return [n for n in self.model.dag.get_roots()]
def get_tree(self):
return self.get_nodes()
def get_nodes(self):
self.model.dag.reset()
if self.subtype == 'forward':
r = []
for en in self.get_entrypoints_nodes():
r.extend(self.model.dag.get_forward_tree(en,excludes=self.excludes))
self.tree = self.dedup_tree(r)
return self.tree
if self.subtype == 'backward':
r = []
for en in self.get_entrypoints_nodes():
r.extend(self.model.dag.get_backward_tree(en))
r = self.model.dag.reverse_level(r)
self.tree = self.dedup_tree(r)
return self.tree
return []
def dedup_tree(self, tree: list):
dedup = {}
for e in tree:
if e.name not in dedup:
dedup[e.name] = e
elif dedup[e.name].level < e.level: # Replace if existing elements level is lower than current elements level
dedup[e.name] = e
return [e for e in dedup.values()]
def get_leveldict(self, nodes: list) -> dict:
# returns dict. Each key represents one level. Each level contains a list of nodes.
ld = dict()
for n in nodes:
if n.level not in ld:
ld[n.level] = []
ld[n.level].append(n)
return ld
+200
View File
@@ -0,0 +1,200 @@
import collections
import re
from DataVaultGenerator.Components import DataVaultEntity, MappingSource, DBEntity, ErrorCollection
class ViewAttribute():
def __init__(self, entity, definition):
self.entity = entity
self.definition = definition
self.name = definition.get('name')
self.attribute = entity.get_attribute(self.name) # DataVaultEntityAttribute
self.components = definition.get('components', [])
self.reference = definition.get('reference', '') # entity.attribute
self.referencetype = definition.get('referencetype', '') # 1:n, m:n, ..
self.order = definition.get('order')
def get_components(self):
# returns the component attribute instances
return [self.entity.get_query_entity_by_alias(c.split('.')[0]).get_attribute(c.split('.')[1]) for c in self.components]
def get_referenced_attribute(self):
if self.reference:
ref = self.reference.split('.') # reference: entityname.attributename
return self.entity.model.get_entity(ref[0]).get_attribute(ref[1])
else:
return None
class View(DataVaultEntity, MappingSource):
# name: customer_d
# type: view
# subtype: dimension, fact
# layer: mart
# attributes:
# - {name: customer_id, type: 'char(40)', components: [h.customer_h_hk]}
# - {name: cust_no, type: 'varchar(32)', components: [h.cust_no]}
# materialize: true #default: false
# materialization:
# mode: merge # merge|full
# target: customer_d_mat # default: name+'_mat'
# layer: mart # default: same as view
# mergekeys:
# - customer_id
# - cust_no
def __init__(self, model, filename, definition: dict = None):
DataVaultEntity.__init__(self, model, filename, definition)
MappingSource.__init__(self, model, self)
self._viewattributes = collections.OrderedDict()
self.materialize = definition.get('materialize', False)
self.materialization = definition.get('materialization', {})
for attrdef in definition['attributes']:
self._viewattributes[attrdef.get('name')] = ViewAttribute(self, attrdef)
def get_viewattributes(self, roles: list = 'all', exclude: list = ()):
"""returns a list of attributes for one or more given roles. You can exclude certain attribute-roles"""
if 'all' in roles:
return [va for va in self._viewattributes.values() if va.attribute.role not in exclude]
else:
return [va for va in self._viewattributes.values() if va.attribute.role in roles and va.attribute.role not in exclude]
def get_viewattribute(self, name):
return self._viewattributes.get(name)
def safe_list_get(self, l, idx, default=None):
try:
return l[idx]
except IndexError:
return default
@property
def query(self):
parsed_result = self.rawquery
for alias, entity in self.get_query_entities().items():
if entity:
include_db = False if self.dbentity.database == entity.dbentity.database else True
replacement = self.model.basetemplates.get('query_entity_alias').render(entity=entity, includeDB=include_db, alias=str(alias))
parsed_result = parsed_result.replace('{' + str(entity.name) + ':' + str(alias) + '}', replacement)
return parsed_result
@property
def rawquery(self):
return self._definition.get('query', '')
def get_query_entities(self):
""" Parses Querystrings like: Select * from {entityname1:alias1} join {entityname2:alias2} and returns a list
of entity instances. """
regex = r"\{(.*?):(.*?)?\}"
entities = {}
matches = re.finditer(regex, self.rawquery, re.MULTILINE)
for matchNum, match in enumerate(matches):
for groupNum in range(0, len(match.groups())):
entities[match.group(2)] = self.model.get_entity(match.group(1))
return entities
def get_referenced_entities(self):
ref_entities = []
for vattr in self._viewattributes.values():
if vattr.reference:
e = vattr.get_referenced_attribute().entity
if e not in ref_entities:
ref_entities.append(e)
return ref_entities
def get_query_entity_by_alias(self, alias):
return self.get_query_entities().get(alias)
def get_component_entities(self):
return [{'entity': self, 'component': c, 'type': c.type} for c in self.get_query_entities().values()]
def get_component_attributes(self, attributename):
components = []
viewattribute = self.get_viewattribute(attributename)
return [{'attribute': viewattribute.attribute,
'sourceentity': cattr.entity,
'sourceattribute': cattr} for cattr in viewattribute.get_components() ]
@property
def materialization_dbentity(self):
return DBEntity(self.materialization.get('target'),
self,
self.model.config.layer.get(self.materialization.get('layer', self._layername )).get(
'defaultdatabaseobject'),
None)
@property
def materialization_rawquery(self):
return self.materialization.get('query', '')
@property
def materialization_query(self):
return self.model.get_parsed_query(self, self.materialization_rawquery)
def get_materialization_query_entities(self):
return self.model.get_query_entities(self.materialization_rawquery)
def validate(self):
errors = ErrorCollection()
for attr in self.attributes.values():
spec = self.layer.sys_specification
errors.append(attr.validate(spec))
# Validating entity references:
if self._definition.get('query'):
for alias, entity in self.get_query_entities().items():
if entity is None:
errors.add("VALIDATION ERROR",
(self.filename, "View", "<" + self.name + ">"),
f'Viewentity for alias <{alias}> not found.')
#Skip next validations because of errors above:
if errors.count > 0:
return errors
# Validating component references:
viewentities = self.get_query_entities()
for vattrname, vattr in self._viewattributes.items():
for comp in vattr.components:
c = comp.split('.')
if c[0] not in viewentities.keys():
errors.add("VALIDATION ERROR",
(self.filename, "View", "<" + self.name + ">", "Attribute <" + vattrname + ">"),
f'components: Viewentity for alias <{c[0]}> not found.')
elif self.get_query_entity_by_alias(c[0]).get_attribute(c[1]) is None:
errors.add("VALIDATION ERROR",
(self.filename, "View", "<" + self.name + ">", "Attribute <" + vattrname + ">"),
f'components: Attribute <{c[1]}> for alias <{c[0]}> not found.')
# Validating attribute references:
for vattrname, vattr in self._viewattributes.items():
if vattr.reference:
ref = vattr.reference.split('.')
entity = self.model.get_entity(ref[0])
if entity is None:
errors.add("VALIDATION ERROR",
(self.filename, "View", "<" + self.name + ">", "Attribute <" + vattrname + ">"),
f'reference: Entity <{ref[0]}> not found.')
elif entity.get_attribute(ref[1]) is None:
errors.add("VALIDATION ERROR",
(self.filename, "View", "<" + self.name + ">", "Attribute <" + vattrname + ">"),
f'reference: Attribute <{ref[1]}> for entity <{ref[0]}> not found.')
return errors
+338
View File
@@ -0,0 +1,338 @@
import logging
import re
from .Components import ErrorCollection, log
'''
targetmapping =
source: (class Mapping)
- target: costcenter_h (targetmapping)
type: mappingonly #d.h der Hub wird nicht aus dieser Tabelle beladen, aber die Satelliten und Links müssen das Mapping kennen
mappingmode: implicit|explicit
mapping:
- [creationname, creationname]
- target: costcentertype_h
type: master # Hauptquelle für hub
mapping:
- [creationname, creationname]
'''
'''
SourcesMapping
- TargetMapping 1..n
- AttributeMapping 1..n
'''
class AttributeMappingExpression:
"""
expression => Expression, e.g.: "concat({attribtute1},'-',{attribute2})"
entity => base entity
"""
def __init__(self, entity, expression: str = '', resulttype: str = '', alias:str = ''):
self._rawexpression = expression
self._resulttype = resulttype
self.entity = entity
self.alias = alias
@property
def datatype(self):
return self._resulttype
@property
def native_datatype(self):
"""Returns the native datatype expression. E.g. nvarchar"""
return self.datatype[:self.datatype.find('(')].strip().lower() if self.datatype.find(
'(') != -1 else self.datatype
# IMPROVE: Müsste eigentlich in der Attributdefinition explizit drin stehen.
@property
def native_datatypelength(self):
"""Returns the native datatype length. E.g. nvarchar(100) -> 100 """
return self.datatype[self.datatype.find('(') + 1:self.datatype.find(')')].strip().lower() if self.datatype.find('(') != -1 else ''
# IMPROVE: Müsste eigentlich in der Attributdefinition explizit drin stehen.
@property
def expression(self):
template = self.entity.model.basetemplates.get('attribute_expression')
parsed_result = self._rawexpression
for placeholder, attr in self.get_expression_attributes().items(): #IMPROVE: braucht man hier überhaupt die Attributeinstanzen? -> Der Name des platzhalters müsste ausreichen
if attr:
parsed_result = parsed_result.replace('{' + str(placeholder) + '}', template.render(component=[attr.name]))
return parsed_result
def get_expression_attributes(self):
""" Parses Querystrings like: concat({attribtute1},'-',{attribute2})
and returns a dict of attribute instances. """
regex = r"\{(.*?)?\}"
attributes = {}
matches = re.finditer(regex, self._rawexpression, re.MULTILINE)
for matchNum, match in enumerate(matches):
for groupNum in range(0, len(match.groups())):
attributes[match.group(1)] = self.entity.get_attribute(match.group(1))
return attributes
class AttributeMapping:
def __init__(self, targetmapping, source:str, target:str, transformation:str=''):
self.targetmapping = targetmapping
self._source = source # => 'attributename' or '{expression: "concat({attribute1},...)"}'
self._target = target # => 'attributename'
self.transformation = transformation
def __repr__(self):
return "AttributeMapping: <{0}> -> <{1}>".format(self._source, self._target)
@property
def source(self):
entity = self.targetmapping.sourceentity #IMPROVE: ggf. schon beim _init_ ermitteln, wenn damit die Validierung nicht vorweggenommen wird
if type(self._source) is dict:
return AttributeMappingExpression(entity , expression = self._source.get('expression')
, resulttype = self.target.datatype
, alias = self.target.name )
else: #falls nur ein attributname angegeben wurde, erzeuge künstliche expression:
attr = entity.get_attribute(self._source)
return AttributeMappingExpression(entity , expression = '{'+ self._source+'}'
, resulttype = attr.datatype
, alias = attr.name )
@property
def target(self):
#entity = self.targetmapping.model.get_entity(self.targetmapping.targetentityname) #IMPROVE: ggf. schon beim _init_ ermitteln, wenn damit die Validierung nicht vorweggenommen wird
return self.targetmapping.targetentity.get_attribute(self._target)
@property
def targetattribute_name(self): #fixme: müsste umgestellt werden auf target.name
return self._target
@property
def transformation_name(self):
return self.transformation
def validate(self):
errors = ErrorCollection()
if self.targetmapping.targetentity.get_attribute(self._target) is None:
errors.add("VALIDATION ERROR",
("Mapping", "<" + self.targetmapping.sourceentityname + ">","target <"+self.targetmapping.targetentityname+">"),
f'target attribute <{self._target}> not found')
sourceentity = self.targetmapping.sourceentity
if type(self._source) is dict:
am_expr = AttributeMappingExpression(sourceentity , expression = self._source.get('expression'), resulttype = None, alias = None)
for attrname, attr in am_expr.get_expression_attributes().items():
if attr is None:
errors.add("VALIDATION ERROR",
("Mapping", "<" + self.targetmapping.sourceentityname + ">",
"target <" + self.targetmapping.targetentityname + ">"),
f'attribute <{attrname}> in sourceexpression "{am_expr._rawexpression}" not found')
else: #falls nur ein attributname angegeben wurde, erzeuge künstliche expression:
attr = sourceentity.get_attribute(self._source)
if attr is None:
errors.add("VALIDATION ERROR",
("Mapping", "<" + self.targetmapping.sourceentityname + ">",
"target <" + self.targetmapping.targetentityname + ">"),
f'source attribute <{self._source}> not found')
return errors
class TargetMapping:
def __init__(self, model, sourceentityname: str, definition: dict = None):
self.model = model
self._definition = definition
self.sourceentityname = sourceentityname
self.targetentityname = definition.get('target')
self.mappingmode = definition.get('mappingmode', 'implicit')
self.type = definition.get('type', 'master')
self.attributemappings = []
self.explicitattributemappings = definition.get('mapping')
# TODO: Wäre besser in der Validierung:
if self.targetentity is None: #vgl. @property
logging.error('mapping: <%s> - target <%s> not found', self.sourceentityname, self.targetentityname)
# durch denn vorzeitigen return, wird die Validierung trozdem ausgeführt, sodass zumindest die Konsolenausgabe passt.
return
if self.mappingmode == 'implicit':
#get implicit roles from config.entitydefaults.
implicit_roles = self.model.config.entitydefaults.get(self.targetentity.type).get('map_implicit_roles', 'base')
for ta in self.targetentity.get_attributes(implicit_roles):
self.attributemappings.append(AttributeMapping(self, ta.name, ta.name)) # das funktioniert nicht, wenn später ein explizites mappin auftaucht, dass eine expression hat
logging.debug('Mapping <%s>: Created implicit attribute mappings for target <%s>: %s', self.sourceentityname,
self.targetentityname, self.attributemappings)
if self.explicitattributemappings:
logging.debug("explicit mappings: %s", self.explicitattributemappings)
logging.debug("result mappings (1): %s", self.attributemappings)
# process all explicit mapped attributes and overwrite existing implicitit attributes if exists:
i = 0
for explicitattributemapping in self.explicitattributemappings: # am = attributemapping
existing_index = None
transformationname = explicitattributemapping[2] if len(explicitattributemapping) == 3 else ''
if self.mappingmode == 'explicit':
self.attributemappings.append(
AttributeMapping(self, explicitattributemapping[0], explicitattributemapping[1],
transformationname))
else:
for am in self.attributemappings:
if am._source == explicitattributemapping[0] or am._target == explicitattributemapping[1]:
existing_index = i
logging.debug("replace Attribute mapping %s at index %s with mapping %s", am,
existing_index, explicitattributemapping)
i = 0
break
else:
existing_index = None
i += 1
if (existing_index is not None):
logging.debug(
'Mapping <%s>: Replace implicit Mapping: %s at index %s with explicit mapping: %s',
self.sourceentityname, self.attributemappings[existing_index], existing_index, #FIXME: Wenn es im Ziel ein implizit gemapptes Attribut mit einer Rolle gibt, die nicht in map_implicit_roles konfiguriert ist, gibt es einen index overflow
explicitattributemapping)
self.attributemappings[existing_index] = AttributeMapping(self,
explicitattributemapping[0],
explicitattributemapping[1],
transformationname)
else:
logging.debug('Mapping <%s>: Adding explicit attributemapping %s', self.sourceentityname,
explicitattributemapping)
self.attributemappings.append(
AttributeMapping(self,
explicitattributemapping[0],
explicitattributemapping[1],
transformationname))
logging.debug("result mappings (2): %s", self.attributemappings)
@property
def targetentity(self):
return self.model.get_entity(self._definition.get('target'))
@property
def sourceentity(self):
return self.model.get_entity(self.sourceentityname)
def get_attribute_mappings(self):
return self.attributemappings
def validate(self):
errors = ErrorCollection()
# ------ Validation Attributes: ---------
# sourceentity = self.model.get_entity(self.sourceentityname)
for am in self.get_attribute_mappings():
errors.append(am.validate())
# TODO: Check for Data-Truncation (macht erst Sinn, wenn die data-type-definition explizit/sauber ist)
# ------ Validation of explit mappings : ---------
if self.explicitattributemappings:
target =[]
for explicitattributemapping in self.explicitattributemappings:
if explicitattributemapping[1] not in target:
target.append(explicitattributemapping[1])
else:
errors.add("VALIDATION ERROR",
("Mapping", "<" + self.sourceentityname + ">",
"target <" + self.targetentityname + ">"),
f'More than one attribute from same source mapped to <{explicitattributemapping[1]}>')
return errors
class Mapping:
def __init__(self, model, sourceentityname: str, filename: str, definition: dict = None):
self.model = model
self.definition = definition
self.targetmappings = {}
self.sourceentityname = sourceentityname
self.filename = filename
self.type = 'mapping'
# FIXE: Das Laden der TargetMappings im _init_ verhindert eine klare Trennung von Validate und Laden.
for tm in self.definition:
self.targetmappings[tm.get('target')] = TargetMapping(model, sourceentityname, tm)
def get_attribute_mappings_by_target(self, targetentityname: str):
tm = self.get_targetmapping_by_target(targetentityname)
am = tm.get_attribute_mappings()
implicit_roles = self.model.config.entitydefaults.get(tm.targetentity.type).get('map_implicit_roles', 'base')
if not am and tm.targetentity.get_attributes(implicit_roles): # Links müssen keine Attribute haben, daher nur Fehler melden, falls Link ein Attribut hat
logging.error(f"Mapping <%s>: No mapping for '{targetentityname}' found", self.sourceentityname)
return am
def get_target_entities(self):
"""returns list of direct mapped entites"""
return [tm.targetentity for tm in self.targetmappings.values()]
def validate(self):
errors = ErrorCollection()
# ------ Validating Entity: ---------
if self.model.get_entity(self.sourceentityname) is None:
errors.add("VALIDATION ERROR",
(self.filename, "Mapping", "<" + self.sourceentityname + ">"),
"delivery <" + self.sourceentityname + "> not found")
return errors
# ------ Validation of linked entities -----
# Validation if all linked entities are present in the same mapping
for tm in self.definition:
e = self.model.get_entity(tm.get('target'))
if e is None:
errors.add("VALIDATION ERROR",
(self.filename, "Mapping", "<"+self.sourceentityname+">"),
"target <"+tm.get('target')+"> not found")
if errors.count > 0:
return errors
targetentities = self.get_target_entities()
links = (e for e in targetentities if e.type == 'link')
for link in links:
for le in link.get_linked_entities():
if le not in targetentities:
errors.add("VALIDATION ERROR",
(self.filename, "Mapping", "<" + self.sourceentityname + ">"),
"linked entity for link <" + link.name + "> is missing. Please provide a mapping for <"+le.name+"> in this mapping")
return errors
# ------ Validating Targetmapping: ---------
for name, tm in self.targetmappings.items():
errors.append(tm.validate())
return errors
def get_targetmapping_by_target(self, target: str):
return self.targetmappings.get(target)
def get_targetmappings(self):
return self.targetmappings
+923
View File
@@ -0,0 +1,923 @@
#from DataVaultGenerator.Entities.Derived import Derived
from tokenize import String
from DataVaultGenerator.Entities.SubDag import SubDag
import logging
import glob
import time
from pathlib import Path, PurePath
from shutil import Error, copy2
from rich.progress import Progress
from rich.progress import TextColumn, BarColumn, SpinnerColumn, TaskProgressColumn, TimeElapsedColumn
from rich.tree import Tree
from rich.panel import Panel
from rich.table import Table
from rich.rule import Rule
from rich import box
from rich.style import Style
from rich import print
import yaml
try:
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader
import sys
import hashlib
import re
import os
import subprocess
from os import makedirs
from cerberus import Validator, schema_registry, rules_set_registry
from jinja2 import Environment, FileSystemLoader, TemplateNotFound, UndefinedError
from datetime import datetime
from DataVaultGenerator.Config import Config
from DataVaultGenerator.Components import ErrorCollection
from DataVaultGenerator.Components import Layer, log
from DataVaultGenerator.Entities.Composite import Composite
from DataVaultGenerator.Entities.Delivery import Delivery
from DataVaultGenerator.Entities.GenericTable import GenericTable
from DataVaultGenerator.Entities.GenericTransformation import GenericTransformation
from DataVaultGenerator.Entities.GenericTask import GenericTask
from DataVaultGenerator.Entities.Hub import Hub
from DataVaultGenerator.Entities.Interface import Interface
from DataVaultGenerator.Entities.Link import Link
from DataVaultGenerator.Entities.PIT import PIT
from DataVaultGenerator.Entities.Bridge import Bridge
from DataVaultGenerator.Entities.Reference import Reference
from DataVaultGenerator.Entities.Report import Report
from DataVaultGenerator.Entities.Satellite import Satellite
from DataVaultGenerator.Entities.Sourcesystem import SourceSystem
from DataVaultGenerator.Entities.View import View
from DataVaultGenerator.Entities.SubDag import SubDag
from DataVaultGenerator.Mapping import Mapping
from DataVaultGenerator.Config import ConfigDict
from DataVaultGenerator.Dag import Dag,DagNode
from DataVaultGenerator import __version__
class Model:
def __init__(self):
logging.info('Init Model')
self.config = Config()
self.entities = {}
self.mappings = {}
self.sourcesystems = {}
self.subdags = {}
self.interfaces = {}
self.layer = {}
self.basetemplates = {}
self.cdc = {}
self.types = {}
self.dag = Dag(self)
self.sys_specifications = {}
self.load_schema()
self.load_types()
self.validator = Validator(allow_unknown=False)
@property
def name(self):
return self.config.model.name
def load_schema(self):
modpath = os.path.dirname(__file__)
for f in glob.glob(os.path.join(modpath, "schema/registry/*.yaml"), recursive=True):
with open(f, 'r') as file:
definition = yaml.load(file, Loader=Loader)
if definition.get('type') == 'schema':
schema_registry.add(definition.get('name'),definition.get('schema'))
else:
rules_set_registry.add(definition.get('name'),definition.get('schema'))
def load_types(self):
modpath = os.path.dirname(__file__)
for f in glob.glob(os.path.join(modpath, "schema/entities/*.yaml"), recursive=False):
with open(f, 'r') as file:
definition = yaml.load(file, Loader=Loader)
self.types[definition.get('type')] = definition
for f in glob.glob(os.path.join(modpath, "schema/*.yaml"), recursive=False):
with open(f, 'r') as file:
definition = yaml.load(file, Loader=Loader)
self.types[definition.get('type')] = definition
def get_types(self):
return self.types
def get_type_property(self, type, property):
return self.types.get(type,{}).get(property,'')
def get_boilerplate(self,type):
return self.get_type_property(type,'boilerplate')
def get_subtypes(self):
return {'base': {'displayname': 'Base/Default'},
'drivingkeystatus': {'displayname': 'Status-Satellite for Driving Key'},
'fact': {'displayname': 'Fact Table/View'},
'dimension': {'displayname': 'Dimension Table/View'},
'': {'displayname': 'n/a'}
}
def get_type_displayname(self, entity_type):
return self.get_types().get(entity_type, {}).get('displayname', 'Unknown Type: ' + entity_type)
def get_entities(self, generatable_only: bool = True ):
if generatable_only:
return {k:v for k, v in self.entities.items() if v.generate == 1}
else:
return self.entities
def get_entities_by_type(self, entity_type: str, generatable_only: bool = True):
if generatable_only:
return [e for e in self.entities.values() if e.type == entity_type and e.generate == 1]
else:
return [e for e in self.entities.values() if e.type == entity_type]
def get_entity(self, name: str):
return self.entities.get(name)
def get_layers(self):
return self.layer
def get_layer(self, name: str):
return self.layer.get(name)
def get_source_systems(self):
return self.sourcesystems
def get_source_system(self, name: str):
return self.sourcesystems.get(name)
def get_subdags(self):
return self.subdags
def get_subdag(self, name: str):
return self.subdags.get(name)
def get_interfaces(self):
return self.interfaces
def get_interface(self, name: str):
return self.interfaces.get(name)
def get_interface_by_source_system(self, sourcesystem):
return [i for i in self.interfaces.values() if i.source_system == sourcesystem]
def get_mapping(self, name: str):
"""return a dict of mapping by Source."""
return self.mappings.get(name)
def get_mappings(self):
"""return a dict of mappings."""
return self.mappings
def load_config(self, filename):
logging.info('reading config from %s', filename)
self.config.load(filename, self.get_type_property('config','schema'), self.validate_definition)
self.templateEnvironment = Environment(
loader=FileSystemLoader(self.config.path.joinpath(self.config.paths.templates)),
**self.config.jinja.environment
)
self.templateEnvironment.globals['generator_version'] = __version__
self.templateEnvironment.globals['now'] = datetime.now
#unfold vars
for k,v in self.config.vars.items():
self.templateEnvironment.globals['_'+k+'_'] = v
for layerid, layerdefinition in self.config.layer.items():
self.layer[layerid] = Layer(self, layerid, layerdefinition)
for templatekey, filename in self.config.basetemplates.items():
try:
templatefilename = self.templateEnvironment.from_string(filename).render(model=self)
self.basetemplates[templatekey] = self.templateEnvironment.get_template(templatefilename)
except TemplateNotFound:
print(f"Config: Base-Template {templatefilename} not found.")
logging.error(f"Config: Base-Template {templatefilename} not found.")
sys.exit(2)
for k,v in self.config.sys_specification.items():
folder = self.config.path.joinpath(v)
try:
with open(folder, 'r') as file:
specyaml = yaml.load(file, Loader=Loader)
is_valid = self.validate_definition('sys_specification definition', self.get_type_property('sys_specification','schema'), specyaml)
if is_valid:
self.sys_specifications[k] = specyaml
else:
print(f"Config: sys_specification definition {folder} is not valid.")
logging.error(f"Config: sys_specification definition {folder} is not valid.")
sys.exit(2)
except FileNotFoundError as e:
print(f"Config: sys_specification {folder} not found.")
logging.error(e)
sys.exit(2)
except yaml.scanner.ScannerError as e:
print("")
logging.error(e)
sys.exit(2)
def get_config(self):
return self.config
def get_file_content(self, filename: str):
with open(filename, 'r') as file:
return file.read()
def save_file_content(self, filename: str, content):
print('Saving content to file: ' + filename)
with open(filename, 'w') as file:
file.write(content)
def create_entity(self, f, entityyml):
entitytype = entityyml.get('type')
#FIXME: einen Prozessschritt eher auswerten
schema = self.get_type_property(entitytype,'schema')
if schema:
self.validate_definition(entityyml.get('type') + ': ' + entityyml.get('name'), schema, entityyml)
classmap = {
'hub': Hub
,'delivery': Delivery
,'satellite':Satellite
,'link':Link
,'view':View
,'pit':PIT
,'bridge':Bridge
,'reference':Reference
,'sourcesystem':SourceSystem
,'source':Interface
,'generictable':GenericTable
,'generictransformation':GenericTransformation
,'generictask':GenericTask
,'report': Report
,'composite': Composite
,'subdag': SubDag
}
if entitytype in classmap:
return classmap[entitytype](self, f, entityyml)
else:
return None
def add_entity(self, entity):
"""adds a new entitity. If an entity with the same name exists, it wont work."""
if entity.name not in self.entities:
self.entities[entity.name] = entity
else:
logging.error('entity %s already exists in model', entity.name)
#if entity.type in ('delivery'):
# derived = Derived(self,'',entity,{'derived_from_type': 'delivery'})
# self.entities[derived.name] = derived
def update_entity(self, entity):
"""replace an existing entity with a new one."""
self.entities[entity.name] = entity
def load_entity_from_file(self, filename):
pass
def validate_definition(self, title, schema, definition, allow_unknown=False, failonerror=True):
self.validator.allow_unknown = allow_unknown
if not self.validator.validate(definition, schema):
tree = Tree(title)
for field, errors in self.validator.errors.items():
for e in errors:
fieldtree = tree.add(field)
if type(e) is dict:
for itemno, itemerrors in e.items():
itemtree = fieldtree.add('item' + str(itemno))
#print(' |-- item ', itemno, ':')
for itemerror in itemerrors:
if type(itemerror) is dict:
for fieldname, fielderror in itemerror.items():
itemtree.add(fieldname + ': ' + str(fielderror))
#print(' |-- ', fieldname, ': ', fielderror)
else:
itemtree.add(str(itemerror))
#print(' |-- ', itemerror)
else:
fieldtree.add(str(e))
print(Panel(tree, title="[red]SCHEMA ERROR", expand=False,padding=1 ))
logging.error(self.validator.errors)
if failonerror:
sys.exit(2)
return False
return True
def validate_entities_schemas(self):
errorcount = 0
folder = self.config.paths.entities
for f in glob.glob(folder + "/**/*.yaml", recursive=True):
try:
with open(f, 'r') as file:
entityyml = yaml.load(file, Loader=Loader)
baseschema = {'name': {'type': 'string', 'required': True}
, 'type': {'required': True,
'allowed': list(self.get_types().keys())}
}
valid_base = self.validate_definition(f, baseschema, entityyml, True, False)
if valid_base:
entitytype = entityyml.get('type')
schema = self.get_type_property(entitytype, 'schema')
if schema:
valid = self.validate_definition(entityyml.get('type') + ': ' + entityyml.get('name'),
schema, entityyml, False, False)
if not valid:
errorcount += 1
else:
errorcount += 1
except (yaml.scanner.ScannerError, UnicodeDecodeError) as e:
print("")
logging.error(e)
sys.exit(2)
return errorcount
def load_entities(self):
folder = self.config.path.joinpath(self.config.paths.entities)
logging.info('reading entities from: %s', folder)
baseschema = {'name': {'type': 'string', 'required': True}
, 'type': {'required': True,
'allowed': list(self.get_types().keys())}
, 'description': {'type': 'string'}
, 'subtype': {'type': 'string'}
, 'generate': {'type': 'integer', 'allowed': [0, 1]}
}
numfiles = len(list(folder.glob('**/*.yaml')))
#i = 0
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(style=Style(color="green")),
TimeElapsedColumn()
) as progress:
task1 = progress.add_task("[blue]Loading: ", total=numfiles)
for f in sorted(folder.glob('**/*.yaml')):
if f.name.startswith(self.config.model.ignore_file_prefix):
logging.info('ignore file because of prefix: %s', f.relative_to(folder))
continue
logging.info('reading entity: %s', f.relative_to(folder))
try:
with open(f, 'r') as file:
entityyml = yaml.load(file, Loader=Loader)
if not entityyml:
print("")
logging.error('document empty: ' + f.name)
print('document empty: ', f.name)
sys.exit(2)
self.validate_definition(f.name, baseschema, entityyml, True)
except (yaml.scanner.ScannerError, UnicodeDecodeError) as e:
print("")
logging.error(e)
sys.exit(2)
entitytype = entityyml.get('type')
namespace = self.get_type_property(entitytype, 'namespace')
if namespace == 'model':
self.add_entity(self.create_entity(f, entityyml))
if namespace == 'sourcesystem':
entityname = entityyml.get('name')
# IMPROVE: umsetzung inkonsistent zu entities -> self.add_entity
self.sourcesystems[entityname] = self.create_entity(f, entityyml)
if namespace == 'source':
entityname = entityyml.get('name')
# IMPROVE: umsetzung inkonsistent zu entities -> self.add_entity
self.interfaces[entityname] = self.create_entity(f, entityyml)
if namespace == 'dag':
entityname = entityyml.get('name')
# IMPROVE: umsetzung inkonsistent zu entities -> self.add_entity
self.subdags[entityname] = self.create_entity(f, entityyml)
progress.update(task1, advance=1)
def load_mappings(self):
folder = self.config.path.joinpath(self.config.paths.mappings)
logging.info('reading mappings from %s', folder)
schema = self.get_type_property('mapping', 'schema')
for f in sorted(folder.glob('**/*.yaml')):
logging.info('reading mapping: %s', f.relative_to(folder))
try:
with open(f, 'r') as file:
mappingyml = yaml.load(file, Loader=Loader)
self.validate_definition('Mapping: ' + f.name, schema, {'root': mappingyml})
for sourceentityname in mappingyml.keys():
self.mappings[sourceentityname] = Mapping(self, sourceentityname, f.name, mappingyml[sourceentityname])
except yaml.scanner.ScannerError as e:
print("")
logging.error(e)
sys.exit(2)
def validate_mappings(self):
errors = ErrorCollection()
mapping_count = len(self.mappings)
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
) as progress:
task1 = progress.add_task("[blue]Validating Mappings: ", total=mapping_count)
for m in self.get_mappings().values():
errors.append(m.validate())
progress.update(task1, advance=1)
for rm in errors.errors: # TODO: merge message-paths
log(logging.ERROR, rm.get('title'), rm.get('path'), rm.get('message'), True)
if errors.count != 0:
logging.error('%i errors found while validating mappings.', errors.count)
return errors
def validate_entities(self):
errors = ErrorCollection()
entity_count = len(self.entities) + len(self.interfaces) + len(self.subdags)
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
) as progress:
task1 = progress.add_task("[blue]Validating Entities: ", total=entity_count)
for e in self.entities.values():
progress.update(task1, advance=1)
errors.append(e.validate())
for e in self.get_interfaces().values():
progress.update(task1, advance=1)
errors.append(e.validate())
for e in self.get_subdags().values():
progress.update(task1, advance=1)
errors.append(e.validate())
#for rm in result['messages']:
# for p in rm.get('path'):
# print(p)
# print(rm.get('path'), rm.get('message'))
for rm in errors.errors: # TODO: merge message-paths
log(logging.ERROR, rm.get('title'), rm.get('path'), rm.get('message'), True)
if errors.count != 0:
logging.error('%i errors found while validating entities.', errors.count)
return errors
def build_dag(self):
logging.info('building dag... ')
#TODO: Möglichkeit eine entity im Dag zu ignorieren einbauen (auf entity-level)
entity_count = len(self.get_entities().keys())
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
) as progress:
task1 = progress.add_task("[blue]Building Dag: ", total=entity_count)
for i, (k,e) in enumerate(self.get_entities().items()):
progress.update(task1, advance=1)
namespace = self.get_type_property(e.type, 'namespace')
if namespace =='model':
logging.info('adding node: %s', e.name)
self.dag.add_node(DagNode(e.name,e))
for c in e.get_component_entities():
c_namespace = self.get_type_property(c.get('type'), 'namespace')
if c_namespace == 'model':
logging.info('adding edge: %s -> %s', c.get('component').name, e.name)
self.dag.add_edge((c.get('component').name, e.name))
def render_entity_templates(self):
targetroot = self.config.path.joinpath(self.config.paths.output)
logging.info('render entity templates to: %s', targetroot)
entity_count = len(self.entities.keys()) +len(self.interfaces.keys()) + len(self.subdags.keys())
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
) as progress:
task1 = progress.add_task("[blue]Generating Entities: ", total=entity_count)
for i, (k,entity) in enumerate(self.entities.items() | self.interfaces.items() | self.subdags.items() ) :
progress.update(task1, advance=1)
if entity.generate == 0:
logging.info('skipping Entity %s (generate=0)', entity.name)
continue
if self.config.generator.get(entity.type.lower()):
for templateconfig in self.config.generator.get(entity.type.lower()).get('templates'):
if entity.subtype in templateconfig.get('subtype', 'base'):
targetfolder = templateconfig['targetfolder']
outfile = templateconfig['filename']
templatefilename = templateconfig['template']
synchtarget = templateconfig.get('synchtarget','')
#Parse targetfolder and -filename templates:
templatefilename = self.templateEnvironment.from_string(templatefilename).render(entity=entity, model=self)
targetfolder = self.templateEnvironment.from_string(targetfolder).render(entity=entity, model=self)
outfile = self.templateEnvironment.from_string(outfile).render(entity=entity, model=self)
synchtarget = self.templateEnvironment.from_string(synchtarget).render(entity=entity, model=self)
targetfolder = targetroot.joinpath(targetfolder)
makedirs(targetfolder, exist_ok=True)
filename = targetfolder.joinpath(outfile)
logging.info('rendering Entity %s with template "%s" to %s ...',
entity.name, templatefilename, filename.relative_to(targetroot))
output = entity.render_template(templatefilename)
if output: # Leere Template-Results werden nicht als File geschrieben
checksum = hashlib.md5(bytes(output,encoding ='utf-8')).hexdigest()
if not self.cdc.get(filename):
self.cdc[filename] = dict(changed='new')
self.cdc[filename]['current'] = checksum
if checksum != self.cdc.get(filename,{}).get('previous'): # Only write file if content has changed
with open(filename, "w") as file:
file.write(output)
if self.cdc[filename]['changed'] != 'new':
self.cdc[filename]['changed'] = 'update'
else:
self.cdc[filename]['changed'] = 'same'
if synchtarget:
synchtarget = self.config.path.joinpath(synchtarget)
self.cdc[filename]['synchto'] = synchtarget
else:
logging.info('skipping Entity %s with template "%s" because the result is empty', entity.name,
templatefilename)
def render_model_templates(self):
targetroot = self.config.path.joinpath(self.config.paths.output)
logging.info('render model templates to: %s', targetroot)
entity_count = len(self.config.generator['model']['templates'])
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
) as progress:
task1 = progress.add_task("[blue]Generating Model: ", total=entity_count)
for templateconfig in self.config.generator['model']['templates']:
progress.update(task1, advance=1)
targetfolder = templateconfig['targetfolder']
outfile = templateconfig['filename']
synchtarget = templateconfig.get('synchtarget','')
#Parse targetfolder and -filename templates:
templateconfig['template'] = self.templateEnvironment.from_string(templateconfig['template']).render(model=self)
targetfolder = self.templateEnvironment.from_string(targetfolder).render(model=self)
outfile = self.templateEnvironment.from_string(outfile).render(model=self)
synchtarget = self.templateEnvironment.from_string(synchtarget).render(model=self)
targetfolder = targetroot.joinpath(targetfolder)
makedirs(targetfolder, exist_ok=True)
filename = targetfolder.joinpath(outfile)
logging.info('rendering Model to %s ...', filename.relative_to(targetroot))
try:
template = self.templateEnvironment.get_template(templateconfig['template'])
output = template.render(
model=self,
templatename=templateconfig['template']
)
if output: # Leere Template-Results werden nicht als File geschrieben
checksum = hashlib.md5(bytes(output,encoding ='utf-8')).hexdigest()
if not self.cdc.get(filename):
self.cdc[filename] = dict(changed='new')
self.cdc[filename]['current'] = checksum
if checksum != self.cdc.get(filename,{}).get('previous'): # Only write file if content has changed
with open(filename, "w") as file:
file.write(output)
if self.cdc[filename]['changed'] != 'new':
self.cdc[filename]['changed'] = 'update'
else:
self.cdc[filename]['changed'] = 'same'
if synchtarget:
synchtarget = self.config.path.joinpath(synchtarget)
self.cdc[filename]['synchto'] = synchtarget
except TemplateNotFound:
print("")
print(Panel(f"[red]Error while rendering model-templates[/red]: Template {templateconfig['template']} not found.", title="[red]RENDER ERROR", padding=1,title_align="left" ))
#print(f"Config: Template {templateconfig['template']} not found.")
logging.error(f"Template {templateconfig['template']} not found.")
sys.exit(2)
except UndefinedError as e:
print("")
logging.error(f"Error while rendering model with Template {templateconfig['template']} :")
logging.error(e)
print(f"Error while rendering model with Template {templateconfig['template']} :", e)
sys.exit(2)
def run_model_hooks(self, type):
# type = 'pre_hooks', 'post_hooks'
if self.config.get(type):
for hookname, hook in self.config.get(type,{}).items():
print('---------------------------------------------------------------------------------')
print('hook:', hookname)
print('---------------------------------------------------------------------------------')
result = subprocess.run(hook, capture_output=True, text=True,shell=True)
print(result.stdout)
print(result.stderr)
def capture_changes_before(self):
targetroot = self.config.path.joinpath(self.config.paths.output)
self.cdc = {}
for f in sorted(targetroot.glob('**/*.*')):
filename = f #.relative_to(targetroot)
with open(f, 'r') as inputfile:
checksum = hashlib.md5(inputfile.read().encode('UTF-8')).hexdigest()
self.cdc[filename] = {'current': '',
'previous': checksum,
'changed': 'deleted' # set changed to 'deleted' - when rendering state changes to 'new', 'same', 'update' or stays 'deleted'
}
self.cdc_time= time.time()
#print(self.cdc)
def capture_changes_after(self):
#delete file not in output anymore
for filename, info in self.cdc.items():
if info.get('changed') == 'deleted':
filename.unlink()
#check for renamed files (compare new and deleted files)
for filename, info in self.cdc.items():
if info.get('changed') == 'deleted':
for f, i in self.cdc.items():
if i.get('changed') == 'new' and info.get('previous') == i.get('current'):
self.cdc[filename]['changed'] = 'renamed' # change 'deleted' to 'renamed'
self.cdc[filename]['newname'] = f
self.cdc[f]['changed'] = 'renametarget' # change 'new' to 'renametarget'
self.cdc[f]['oldname'] = filename
def display_changes(self):
table = Table(show_edge=False, box=box.MINIMAL)
table.add_column("State", justify="right", no_wrap=True)
table.add_column("File", style="white")
update = [f for f, i in self.cdc.items() if i.get('changed') == 'update']
new = [f for f, i in self.cdc.items() if i.get('changed') == 'new']
deleted = [f for f, i in self.cdc.items() if i.get('changed') == 'deleted']
renamed = {f:i for f, i in self.cdc.items() if i.get('changed') == 'renamed'}
for f in update:
table.add_row("[yellow]updated",str(f.relative_to(os.getcwd())))
for f in new:
table.add_row("[green]new",str(f.relative_to(os.getcwd())))
for f in deleted:
table.add_row("[red]deleted",str(f.relative_to(os.getcwd())))
for f, i in renamed.items():
table.add_row("[blue]renamed",str(f.relative_to(os.getcwd())) + " => "+ str(i.get('newname').relative_to(os.getcwd())) )
if table.row_count > 0:
print(table)
else:
print("[grey]No files changed.")
# for state in ('update','new','deleted','renamed'):
# print(state + ': ')
# for k, v in self.cdc.items():
# if v.get('changed') == state:
# if state == 'renamed':
# print(" {} => {}".format(k.relative_to(os.getcwd()), v.get('newname').relative_to(os.getcwd())))
# else:
# print(" {}".format(k.relative_to(os.getcwd())))
#
# print(' ')
def synch(self, fullsynch = False):
logging.info('Synching to targets: ...' )
if fullsynch:
items = [(k, v.get('synchto') ) for k, v in self.cdc.items() if v.get('synchto')]
else:
items = [(k, v.get('synchto') ) for k, v in self.cdc.items() if v.get('synchto') and v.get('changed') in ('update','new','renamed') ]
#i = 0
for item in items:
#i += 1
#print_progressbar(i, len(items), prefix='Synching to Target: ', suffix='Complete') #TODO: auf rich.progress umstellen
logging.info("Copy {} to {}".format(item[0].relative_to(os.getcwd()), item[1] ))
makedirs( item[1], exist_ok=True)
copy2(item[0], item[1])
def get_parsed_query(self, entity, rawquery):
""" Parses Querystrings like: Select * from {entityname1} join {entityname2}
and returns a parsed query like Select * from [database].[dbo].[entityname1]
join [database].[dbo].[entityname2] """
parsed_result = rawquery
for placeholder, queryentity in self.get_query_entities(rawquery).items():
if queryentity:
include_db = False if entity.dbentity.database == queryentity.dbentity.database else True
parsed_result = parsed_result.replace('{' + str(placeholder) + '}',
queryentity.dbentity.get_qualifier(include_db))
return parsed_result
def get_query_entities(self, rawquery):
""" Parses Querystrings like: Select * from {entityname1} join {entityname2}
and returns a list of entity instances. """
regex = r"\{(.*?)?\}"
entities = {}
matches = re.finditer(regex, rawquery, re.MULTILINE)
for matchNum, match in enumerate(matches):
for groupNum in range(0, len(match.groups())):
entities[match.group(1)] = self.get_entity(match.group(1))
return entities
def get_entity_name_suggestion(self, entity_type: list, name: str, maxdist: int = 5) -> String:
suggest = None
dist = maxdist
for e in self.entities.values():
if e.type in entity_type:
t = self.get_levenshtein_distance(name, e.name)
if t < dist:
suggest = e.name
dist = t
if t == 1:
return suggest
#print(name, e, t)
return suggest
def get_levenshtein_distance(self, word1, word2) -> int:
word2 = word2.lower()
word1 = word1.lower()
matrix = [[0 for x in range(len(word2) + 1)] for x in range(len(word1) + 1)]
for x in range(len(word1) + 1):
matrix[x][0] = x
for y in range(len(word2) + 1):
matrix[0][y] = y
for x in range(1, len(word1) + 1):
for y in range(1, len(word2) + 1):
if word1[x - 1] == word2[y - 1]:
matrix[x][y] = min(
matrix[x - 1][y] + 1,
matrix[x - 1][y - 1],
matrix[x][y - 1] + 1
)
else:
matrix[x][y] = min(
matrix[x - 1][y] + 1,
matrix[x - 1][y - 1] + 1,
matrix[x][y - 1] + 1
)
return matrix[len(word1)][len(word2)]
def create_snapshot(self, filename):
logging.info('creating snaphot: %s', filename)
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
) as progress:
entity_count = len(self.entities.keys()) +len(self.interfaces.keys()) + len(self.subdags.keys())
task1 = progress.add_task("[blue]Generating Snapshot: ", total=entity_count)
with open(filename, 'w') as file:
docs = list()
for i, (k,entity) in enumerate(self.entities.items() | self.interfaces.items() | self.subdags.items() ):
docs.append(entity._definition)
progress.update(task1, advance=1)
yaml.dump_all(
docs,
file,
default_flow_style=False,
explicit_start=False,
sort_keys=False
)
def load_snapshot(self, filename):
documents = dict()
with open(filename, 'r') as file:
for obj in list( yaml.load_all(file, Loader=yaml.FullLoader) ):
documents[obj.get('name')] = obj
print(documents)
+1
View File
@@ -0,0 +1 @@
__version__ = '1.1.5'
+129
View File
@@ -0,0 +1,129 @@
#!/usr/bin/env python
import sys
import argparse
import logging
from DataVaultGenerator.Components import ErrorCollection
from DataVaultGenerator.Model import Model
from DataVaultGenerator import __version__
from rich import print
from rich.table import Table
from rich.panel import Panel
from rich import box
MIN_PYTHON = (3, 9)
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
if sys.version_info < MIN_PYTHON:
sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON)
parser = argparse.ArgumentParser(description='DataVaultGenerator')
parser.add_argument('config', help='Path of Config file')
parser.add_argument('-l','--loglevel', default='INFO', help='Loglevel: CRITICAL, ERROR, WARNING, INFO, DEBUG (default: %(default)s)')
parser.add_argument('-lf','--logfile', default='generator.log', help='Logfilename (default: %(default)s)')
parser.add_argument('-v', '--validate', dest='validateonly', help='Switch to run validation only', action='store_true')
parser.add_argument('--validateschema', dest='validateschemaonly', help='Switch to run validation of schema only',action='store_true')
parser.add_argument('--novalidate', help='Switch to skip validation',action='store_true')
parser.add_argument('--synch', help='Synchronize changed files to the target',action='store_true')
parser.add_argument('--fullsynch', help='Synchronize all files to the target',action='store_true')
parser.add_argument('--runhooks', help='Run pre- and post-hooks',action='store_true')
parser.add_argument('--snapshot', help='Create Snapshotfile',action='store_true')
args = parser.parse_args()
title = """\
_____ _ __ __ _ _ _____ _
| __ \ | | \ \ / / | | | / ____| | |
| | | | __ _| |_ __ \ \ / /_ _ _ _| | |_ | | __ ___ _ __ ___ _ __ __ _| |_ ___ _ __
| | | |/ _` | __/ _` \ \/ / _` | | | | | __| | | |_ |/ _ \ '_ \ / _ \ '__/ _` | __/ _ \| '__|
| |__| | (_| | || (_| |\ / (_| | |_| | | |_ | |__| | __/ | | | __/ | | (_| | || (_) | |
|_____/ \__,_|\__\__,_| \/ \__,_|\__,_|_|\__| \_____|\___|_| |_|\___|_| \__,_|\__\___/|_|
"""
print(Panel(title, expand=False, box=box.HORIZONTALS))
table = Table(show_header =False, show_edge=False )
table.add_column("Prop", justify="right", style="white", no_wrap=True)
table.add_column("Value", style="white")
table.add_row("Version", __version__)
table.add_row("Config", args.config)
print(table)
print('')
numeric_level = getattr(logging, args.loglevel.upper(), None)
logging.basicConfig(filename=args.logfile,
filemode='w',
level=numeric_level,
format='%(asctime)s %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %I:%M:%S')
logging.info(title)
dm = Model()
#FIXME: boilerplate nutzen, um objekt per cli zu erstellen: dvgen add hub my_hub /path/tofile
#with open('hub_boiler.yaml', 'w') as the_file:
# the_file.write(dm.get_boilerplate('hub'))
dm.load_config(args.config)
if args.runhooks: dm.run_model_hooks('pre_hooks')
if args.validateschemaonly:
errorcount = dm.validate_entities_schemas()
print(errorcount, "Errors found during validation of entity-schemas")
exit(0)
dm.load_entities()
dm.load_mappings()
if not args.novalidate:
overallerrorcount = 0
errors = dm.validate_entities()
if errors.count != 0:
overallerrorcount += errors.count
print(errors.count, "Errors found during validation of Entities")
if overallerrorcount != 0:
exit(2)
errors = dm.validate_mappings()
if errors.count != 0:
overallerrorcount += errors.count
print(errors.count, "Errors found during validation of Mappings")
if overallerrorcount != 0:
exit(2)
if not args.validateonly:
dm.build_dag()
dm.capture_changes_before()
dm.render_entity_templates()
dm.render_model_templates()
if args.snapshot: dm.create_snapshot('snapshot.yaml')
dm.capture_changes_after() #FIXME: Wenn ein modeltemplate nicht verfügbar ist, erscheint es im Log aber nicht auf der Konsole und die nachfolgenden Zeilen werden nicht ausgeführt
if args.synch or args.fullsynch: dm.synch(args.fullsynch)
print('')
dm.display_changes()
print('')
if args.runhooks: dm.run_model_hooks('post_hooks')
if __name__ == "__main__":
main()
Binary file not shown.
+286
View File
@@ -0,0 +1,286 @@
type: config
name: Configuration
displayname: Configuration
namespace: model
schema:
# ------------------------------------------------------------------------------------------------
# Model
# ------------------------------------------------------------------------------------------------
model:
type: dict
schema:
name:
type: string
paths:
type: dict
schema:
log:
type: string
required: True
entities:
type: string
required: True
mappings:
type: string
required: True
templates:
type: string
required: True
output:
type: string
required: True
ignore_file_prefix:
type: string
# ------------------------------------------------------------------------------------------------
# Variables
# ------------------------------------------------------------------------------------------------
vars:
type: dict
# ------------------------------------------------------------------------------------------------
# Hooks
# ------------------------------------------------------------------------------------------------
pre_hooks:
type: dict
valuesrules:
type: list
post_hooks:
type: dict
valuesrules:
type: list
# ------------------------------------------------------------------------------------------------
# Key Definition
# ------------------------------------------------------------------------------------------------
keyattribute:
type: dict
required: True
schema:
type:
type: string
required: True
role:
type: string
required: True
mandatory:
type: boolean
ghost:
type: string
zerokey:
type: string
# ------------------------------------------------------------------------------------------------
# Modelling Constraints
# ------------------------------------------------------------------------------------------------
constraints:
type: dict
schema:
enforce_bk_type:
type: [string, list]
# ------------------------------------------------------------------------------------------------
# HASH Definition
# ------------------------------------------------------------------------------------------------
hash_algorithm:
type: string
required: true
hash_separator:
type: string
required: true
hash_case:
type: string
required: True
allowed: ['upper', 'lower', 'keep']
# ------------------------------------------------------------------------------------------------
# Business Key Treatment
# ------------------------------------------------------------------------------------------------
business_key_treatment:
type: dict
required: True
schema:
trim:
type: string
required: True
allowed: ['left', 'right', 'both']
case:
type: string
required: True
allowed: ['upper', 'lower', 'keep']
# ------------------------------------------------------------------------------------------------
# Hashdiff Attribute Treatment
# ------------------------------------------------------------------------------------------------
hashdiff_attribute_treatment:
type: dict
required: True
schema:
trim:
type: string
required: True
allowed: ['left', 'right', 'both']
case:
type: string
required: True
allowed: ['upper', 'lower', 'keep']
# ------------------------------------------------------------------------------------------------
# Common Attributes
# ------------------------------------------------------------------------------------------------
commonattributes:
type: dict
valuesrules:
type: dict
schema:
name:
type: string
required: True
type:
type: string
required: True
mandatory:
type: boolean
ghost:
type: string
# ------------------------------------------------------------------------------------------------
# Ghost-records
# ------------------------------------------------------------------------------------------------
ghostrecord:
type: dict
valuesrules:
type: string
# ------------------------------------------------------------------------------------------------
# Layer
# ------------------------------------------------------------------------------------------------
layer:
type: dict
valuesrules:
type: dict
schema:
name:
type: string
required: True
description:
type: string
connectionname:
type: string
sys_specification:
type: string
defaultdatabaseobject:
type: dict
schema:
database:
type: string
schema:
type: string
filegroup:
type: string
properties:
type: dict
# ------------------------------------------------------------------------------------------------
# Entity Defaults
# ------------------------------------------------------------------------------------------------
entitydefaults:
type: dict
valuesrules:
type: dict
schema:
layer:
type: string
required: True
attributes:
type: list
schema:
type: string
attribute_role:
type: string
map_implicit_roles:
type: list
schema:
type: string
extra:
type: dict
# ------------------------------------------------------------------------------------------------
# Generator config
# ------------------------------------------------------------------------------------------------
generator:
type: dict
valuesrules:
type: dict
schema:
templates:
type: list
schema:
type: dict
schema:
subtype:
type: [string,list]
template:
type: string
required: True
targetfolder:
type: string
required: True
filename:
type: string
required: True
lang:
type: string
required: True
synchtarget:
type: string
# ------------------------------------------------------------------------------------------------
# Base templates
# ------------------------------------------------------------------------------------------------
templates:
type: dict
schema:
column_ddl:
type: string
required: True
table_qualifier:
type: string
required: True
attribute_expression:
type: string
required: True
entity_key_name:
type: string
required: True
query_entity_alias:
type: string
required: True
# ------------------------------------------------------------------------------------------------
# Template Engine
# ------------------------------------------------------------------------------------------------
jinja:
type: dict
# ------------------------------------------------------------------------------------------------
# sys_specifications
# ------------------------------------------------------------------------------------------------
sys_specification:
type: dict
@@ -0,0 +1,52 @@
type: bridge
name: bridge
displayname: Bridge Table
namespace: model
schema:
# GeneratorEntity
name:
type: string
required: True
type:
type: string
required: True
subtype:
type: string
allowed: [base]
generate:
type: integer
allowed: [0,1]
extra:
type: [list, dict, string, integer]
description:
type: string
sql_pre_hook:
type: string
sql_post_hook:
type: string
# DataVaultEntity
layer:
type: string
dbentity:
schema: dbentity
exclude_commonattributes:
type: list
# Specific
updatemode:
type: string
allowed: [full, merge, append, custom]
snapshotattribute:
type: dict
schema: attribute
snapshotquery:
type: string
bridgeattributes: attributes
hubs:
type: list
schema:
type: string
links:
type: list
schema:
type: string
@@ -0,0 +1,49 @@
type: composite
name: composite
displayname: Composite
namespace: model
schema:
# Generator Entity:
name:
type: string
required: True
type:
type: string
required: True
subtype:
type: string
generate:
type: integer
allowed: [0,1]
extra:
type: [list, dict, string, integer]
description:
type: string
sql_pre_hook:
type: string
sql_post_hook:
type: string
# DataVaultEntity
layer:
type: string
dbentity:
type: dict
schema:
name:
type: string
filegroup:
type: string
database:
type: string
schema:
type: string
properties:
type: dict
exclude_commonattributes:
type: list
# Attributes:
attributes: attributes
# Specific:
query:
type: string
required: True
@@ -0,0 +1,60 @@
type: delivery
name: delivery
displayname: Delivery
namespace: model
schema:
# Generator Entity:
name:
type: string
required: True
type:
type: string
required: True
subtype:
type: string
generate:
type: integer
allowed: [0,1]
extra:
type: [list, dict, string, integer]
description:
type: string
sql_pre_hook:
type: string
sql_post_hook:
type: string
# DataVaultEntity
layer:
type: string
dbentity:
schema: dbentity
exclude_commonattributes:
type: list
# Specific:
recordsource:
type: string
batchmode:
type: string
deltaattribute:
type: string
deltainitialvalue:
type: string
query:
type: string
interfaces:
type: list
required: True
schema:
type: string
sourcesystem:
type: string
sourcetype:
type: string
ldts_source:
type: string
properties:
type: dict
# Attributes:
attributes: attributes
@@ -0,0 +1,37 @@
type: generictable
name: generictable
displayname: Generic Table
namespace: model
schema:
# Generator Entity:
name:
type: string
required: True
type:
type: string
required: True
subtype:
type: string
generate:
type: integer
allowed: [0,1]
extra:
type: [list, dict, string, integer]
description:
type: string
sql_pre_hook:
type: string
sql_post_hook:
type: string
# DataVaultEntity
layer:
type: string
dbentity:
schema: dbentity
exclude_commonattributes:
type: list
# Specific:
# Attributes:
attributes: attributes
@@ -0,0 +1,39 @@
type: generictask
name: generictask
displayname: Generic Task
namespace: model
schema:
# Generator Entity:
name:
type: string
required: True
type:
type: string
required: True
subtype:
type: string
required: True
generate:
type: integer
allowed: [0,1]
extra:
type: [list, dict, string, integer]
description:
type: string
sql_pre_hook:
type: string
sql_post_hook:
type: string
# DataVaultEntity
layer:
type: string
dbentity:
schema: dbentity
# Specific:
sources:
type: list
required: True
targets:
type: list
required: True
@@ -0,0 +1,41 @@
type: generictransformation
name: generictransformation
displayname: Generic Transformation
namespace: model
schema:
# Generator Entity:
name:
type: string
required: True
type:
type: string
required: True
subtype:
type: string
generate:
type: integer
allowed: [0,1]
extra:
type: [list, dict, string, integer]
description:
type: string
sql_pre_hook:
type: string
sql_post_hook:
type: string
# DataVaultEntity
layer:
type: string
dbentity:
schema: dbentity
# Specific:
query:
type: string
required: True
sources:
type: list
required: True
targets:
type: list
required: True
@@ -0,0 +1,59 @@
type: hub
name: Hub
displayname: Hub
namespace: model
schema:
# GeneratorEntity
name:
type: string
required: True
type:
type: string
required: True
subtype:
type: string
generate:
type: integer
allowed: [0,1]
extra:
type: [list, dict, string, integer]
description:
type: string
sql_pre_hook:
type: string
sql_post_hook:
type: string
# DataVaultEntity
layer:
type: string
dbentity:
schema: dbentity
# Specific
key:
type: string
key_treatment:
type: dict
schema:
trim:
type: string
allowed: ['left', 'right', 'both']
case:
type: string
allowed: ['upper', 'lower', 'keep']
roleof:
type: string
caseSesitive:
type: integer
allowed: [0,1]
# Attributes:
attributes: attributes
boilerplate: |
name: {unique_name}
type: hub
key: primary_key_name # Hashkey
description: 'optional description'
attributes:
- {name: 'businesskey1', type: 'nvarchar(200)'}
@@ -0,0 +1,52 @@
type: link
name: Link
displayname: Link
namespace: model
schema:
# GeneratorEntity
name:
type: string
required: True
type:
type: string
required: True
subtype:
type: string
generate:
type: integer
allowed: [0,1]
extra:
type: [list, dict, string, integer]
description:
type: string
sql_pre_hook:
type: string
sql_post_hook:
type: string
# DataVaultEntity
layer:
type: string
dbentity:
schema: dbentity
exclude_commonattributes:
type: list
# Specific
key:
type: string
hubs:
type: list
required: True
schema:
type: string
links:
type: list
schema:
type: string
drivingkeys:
type: list
schema:
type: string
# Attributes:
attributes: attributes
@@ -0,0 +1,61 @@
type: pit
name: pit
displayname: Point in Time Table
namespace: model
schema:
# GeneratorEntity
name:
type: string
required: True
type:
type: string
required: True
subtype:
type: string
allowed: [base]
generate:
type: integer
allowed: [0,1]
extra:
type: [list, dict, string, integer]
description:
type: string
sql_pre_hook:
type: string
sql_post_hook:
type: string
# DataVaultEntity
layer:
type: string
dbentity:
schema: dbentity
exclude_commonattributes:
type: list
# Specific
snapshotmode:
type: string
required: True
allowed: [latest, snapshotquery, full, snapshottable]
baseentity:
type: string
required: True
satellites:
type: list
required: True
schema:
type: string
snapshotattribute:
type: dict
schema: attribute
snapshottable:
type: string
snapshottableattribute:
type: string
snapshotquery:
type: string
pitattributes:
type: list
schema:
type: list
query:
type: string
@@ -0,0 +1,41 @@
type: reference
name: reference
displayname: Reference Table
namespace: model
schema:
# GeneratorEntity
name:
type: string
required: True
type:
type: string
required: True
subtype:
type: string
generate:
type: integer
allowed: [0,1]
extra:
type: [list, dict, string, integer]
description:
type: string
sql_pre_hook:
type: string
sql_post_hook:
type: string
# DataVaultEntity
layer:
type: string
dbentity:
schema: dbentity
exclude_commonattributes:
type: list
# Specific
data:
type: list
schema:
type: list
query:
type: string
# Attributes:
attributes: attributes
@@ -0,0 +1,6 @@
type: report
name: report
displayname: Report
namespace: reporting
schema:
@@ -0,0 +1,48 @@
type: satellite
name: Satellite
displayname: Satellite
namespace: model
schema:
# GeneratorEntity
name:
type: string
required: True
type:
type: string
required: True
subtype:
type: string
allowed: [base,drivingkeystatus]
generate:
type: integer
allowed: [0,1]
extra:
type: [list, dict, string, integer]
description:
type: string
sql_pre_hook:
type: string
sql_post_hook:
type: string
# DataVaultEntity
layer:
type: string
dbentity:
schema: dbentity
exclude_commonattributes:
type: list
# Specific
parent:
type: string
required: True
hashdiff_attribute_treatment:
type: dict
schema:
trim:
type: string
allowed: ['left', 'right', 'both']
case:
type: string
allowed: ['upper', 'lower', 'keep']
# Attributes:
attributes: attributes
@@ -0,0 +1,26 @@
type: source
name: source
displayname: Source interface
namespace: source
schema:
# Generator Entity:
name:
type: string
required: True
type:
type: string
required: True
description:
type: string
# Specific:
sourcetype:
type: string
sourcesystem:
type: string
dbentity:
schema: dbentity
properties:
type: dict
# Attributes:
attributes: attributes
@@ -0,0 +1,23 @@
type: sourcesystem
name: sourcesystem
displayname: Source System
namespace: sourcesystem
schema:
# Generator Entity:
name:
type: string
required: True
type:
type: string
required: True
description:
type: string
# Specific:
shortname:
type: string
connectionname:
type: string
sourcesystemtype:
type: string
sys_specification:
type: string
@@ -0,0 +1,29 @@
type: subdag
name: subdag
displayname: Sub Dag
namespace: dag
schema:
# Generator Entity:
name:
type: string
required: True
type:
type: string
required: True
subtype:
type: string
generate:
type: integer
allowed: [0,1]
extra:
type: [list, dict, string, integer]
description:
type: string
# Specific:
entrypoints:
type: list
excludes:
type: list
key:
type: string
@@ -0,0 +1,59 @@
type: view
name: view
displayname: View
namespace: model
schema:
# Generator Entity:
name:
type: string
required: True
type:
type: string
required: True
subtype:
type: string
generate:
type: integer
allowed: [0,1]
extra:
type: [list, dict, string, integer]
description:
type: string
sql_pre_hook:
type: string
sql_post_hook:
type: string
# DataVaultEntity
layer:
type: string
dbentity:
schema: dbentity
exclude_commonattributes:
type: list
# Attributes:
attributes: attributes
# Specific:
query:
type: string
required: True
materialize:
type: integer
allowed: [0,1]
materialization:
type: dict
schema:
mode:
type: string
allowed: ['merge', 'full']
target:
type: string
layer:
type: string
mergekeys:
type: list
schema:
type: string
query:
type: string
+34
View File
@@ -0,0 +1,34 @@
type: mapping
name: mapping
displayname: Mapping
schema:
root: # Workaround, da cerberus dynamische roots nicht unterstützt
type: dict
valuesrules:
type: list
schema:
type: dict
schema:
target:
type: string
required: True
mappingmode:
type: string
allowed: ['explicit', 'implicit']
type:
type: string
allowed: ['mappingonly', 'master']
mapping:
type: list
schema:
type: list
schema:
type: [string, dict]
schema:
expression:
type: string
required: True
resulttype:
type: string
+5
View File
@@ -0,0 +1,5 @@
type: model
name: model
displayname: Model
schema:
@@ -0,0 +1,42 @@
name: attribute
type: schema
schema:
name:
type: string
required: True
type:
type: string
required: True
mandatory:
type: boolean
default:
type: string
description:
type: string
ghost:
type: string
role:
type: string
precision:
type: [integer, string]
scale:
type: [integer, string]
length:
type: [integer, string]
order:
type: integer
pii:
type: boolean
props:
type: dict
# View related
reference:
type: string
referencetype:
type: string
components:
type: list
schema:
type: string
@@ -0,0 +1,9 @@
name: attributes
type: ruleset
schema:
type: list
schema:
type: dict
schema: attribute
@@ -0,0 +1,13 @@
name: dbentity
type: schema
schema:
name:
type: string
filegroup:
type: string
database:
type: string
schema:
type: string
properties:
type: dict
@@ -0,0 +1,19 @@
type: sys_specification
name: sys_specification
displayname: System Specification
namespace: lang
schema:
sys_specification:
type: string
required: true
objectnames:
type: string
datatypes:
type: dict
valuesrules:
type: dict
schema:
pattern:
type: string
required: True