This repository has been archived on 2026-03-20. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
datavaultgenerator-1.1.5/DataVaultGenerator/Model.py

924 lines
37 KiB
Python

#from DataVaultGenerator.Entities.Derived import Derived
from tokenize import String
from DataVaultGenerator.Entities.SubDag import SubDag
import logging
import glob
import time
from pathlib import Path, PurePath
from shutil import Error, copy2
from rich.progress import Progress
from rich.progress import TextColumn, BarColumn, SpinnerColumn, TaskProgressColumn, TimeElapsedColumn
from rich.tree import Tree
from rich.panel import Panel
from rich.table import Table
from rich.rule import Rule
from rich import box
from rich.style import Style
from rich import print
import yaml
try:
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader
import sys
import hashlib
import re
import os
import subprocess
from os import makedirs
from cerberus import Validator, schema_registry, rules_set_registry
from jinja2 import Environment, FileSystemLoader, TemplateNotFound, UndefinedError
from datetime import datetime
from DataVaultGenerator.Config import Config
from DataVaultGenerator.Components import ErrorCollection
from DataVaultGenerator.Components import Layer, log
from DataVaultGenerator.Entities.Composite import Composite
from DataVaultGenerator.Entities.Delivery import Delivery
from DataVaultGenerator.Entities.GenericTable import GenericTable
from DataVaultGenerator.Entities.GenericTransformation import GenericTransformation
from DataVaultGenerator.Entities.GenericTask import GenericTask
from DataVaultGenerator.Entities.Hub import Hub
from DataVaultGenerator.Entities.Interface import Interface
from DataVaultGenerator.Entities.Link import Link
from DataVaultGenerator.Entities.PIT import PIT
from DataVaultGenerator.Entities.Bridge import Bridge
from DataVaultGenerator.Entities.Reference import Reference
from DataVaultGenerator.Entities.Report import Report
from DataVaultGenerator.Entities.Satellite import Satellite
from DataVaultGenerator.Entities.Sourcesystem import SourceSystem
from DataVaultGenerator.Entities.View import View
from DataVaultGenerator.Entities.SubDag import SubDag
from DataVaultGenerator.Mapping import Mapping
from DataVaultGenerator.Config import ConfigDict
from DataVaultGenerator.Dag import Dag,DagNode
from DataVaultGenerator import __version__
class Model:
def __init__(self):
logging.info('Init Model')
self.config = Config()
self.entities = {}
self.mappings = {}
self.sourcesystems = {}
self.subdags = {}
self.interfaces = {}
self.layer = {}
self.basetemplates = {}
self.cdc = {}
self.types = {}
self.dag = Dag(self)
self.sys_specifications = {}
self.load_schema()
self.load_types()
self.validator = Validator(allow_unknown=False)
@property
def name(self):
return self.config.model.name
def load_schema(self):
modpath = os.path.dirname(__file__)
for f in glob.glob(os.path.join(modpath, "schema/registry/*.yaml"), recursive=True):
with open(f, 'r') as file:
definition = yaml.load(file, Loader=Loader)
if definition.get('type') == 'schema':
schema_registry.add(definition.get('name'),definition.get('schema'))
else:
rules_set_registry.add(definition.get('name'),definition.get('schema'))
def load_types(self):
modpath = os.path.dirname(__file__)
for f in glob.glob(os.path.join(modpath, "schema/entities/*.yaml"), recursive=False):
with open(f, 'r') as file:
definition = yaml.load(file, Loader=Loader)
self.types[definition.get('type')] = definition
for f in glob.glob(os.path.join(modpath, "schema/*.yaml"), recursive=False):
with open(f, 'r') as file:
definition = yaml.load(file, Loader=Loader)
self.types[definition.get('type')] = definition
def get_types(self):
return self.types
def get_type_property(self, type, property):
return self.types.get(type,{}).get(property,'')
def get_boilerplate(self,type):
return self.get_type_property(type,'boilerplate')
def get_subtypes(self):
return {'base': {'displayname': 'Base/Default'},
'drivingkeystatus': {'displayname': 'Status-Satellite for Driving Key'},
'fact': {'displayname': 'Fact Table/View'},
'dimension': {'displayname': 'Dimension Table/View'},
'': {'displayname': 'n/a'}
}
def get_type_displayname(self, entity_type):
return self.get_types().get(entity_type, {}).get('displayname', 'Unknown Type: ' + entity_type)
def get_entities(self, generatable_only: bool = True ):
if generatable_only:
return {k:v for k, v in self.entities.items() if v.generate == 1}
else:
return self.entities
def get_entities_by_type(self, entity_type: str, generatable_only: bool = True):
if generatable_only:
return [e for e in self.entities.values() if e.type == entity_type and e.generate == 1]
else:
return [e for e in self.entities.values() if e.type == entity_type]
def get_entity(self, name: str):
return self.entities.get(name)
def get_layers(self):
return self.layer
def get_layer(self, name: str):
return self.layer.get(name)
def get_source_systems(self):
return self.sourcesystems
def get_source_system(self, name: str):
return self.sourcesystems.get(name)
def get_subdags(self):
return self.subdags
def get_subdag(self, name: str):
return self.subdags.get(name)
def get_interfaces(self):
return self.interfaces
def get_interface(self, name: str):
return self.interfaces.get(name)
def get_interface_by_source_system(self, sourcesystem):
return [i for i in self.interfaces.values() if i.source_system == sourcesystem]
def get_mapping(self, name: str):
"""return a dict of mapping by Source."""
return self.mappings.get(name)
def get_mappings(self):
"""return a dict of mappings."""
return self.mappings
def load_config(self, filename):
logging.info('reading config from %s', filename)
self.config.load(filename, self.get_type_property('config','schema'), self.validate_definition)
self.templateEnvironment = Environment(
loader=FileSystemLoader(self.config.path.joinpath(self.config.paths.templates)),
**self.config.jinja.environment
)
self.templateEnvironment.globals['generator_version'] = __version__
self.templateEnvironment.globals['now'] = datetime.now
#unfold vars
for k,v in self.config.vars.items():
self.templateEnvironment.globals['_'+k+'_'] = v
for layerid, layerdefinition in self.config.layer.items():
self.layer[layerid] = Layer(self, layerid, layerdefinition)
for templatekey, filename in self.config.basetemplates.items():
try:
templatefilename = self.templateEnvironment.from_string(filename).render(model=self)
self.basetemplates[templatekey] = self.templateEnvironment.get_template(templatefilename)
except TemplateNotFound:
print(f"Config: Base-Template {templatefilename} not found.")
logging.error(f"Config: Base-Template {templatefilename} not found.")
sys.exit(2)
for k,v in self.config.sys_specification.items():
folder = self.config.path.joinpath(v)
try:
with open(folder, 'r') as file:
specyaml = yaml.load(file, Loader=Loader)
is_valid = self.validate_definition('sys_specification definition', self.get_type_property('sys_specification','schema'), specyaml)
if is_valid:
self.sys_specifications[k] = specyaml
else:
print(f"Config: sys_specification definition {folder} is not valid.")
logging.error(f"Config: sys_specification definition {folder} is not valid.")
sys.exit(2)
except FileNotFoundError as e:
print(f"Config: sys_specification {folder} not found.")
logging.error(e)
sys.exit(2)
except yaml.scanner.ScannerError as e:
print("")
logging.error(e)
sys.exit(2)
def get_config(self):
return self.config
def get_file_content(self, filename: str):
with open(filename, 'r') as file:
return file.read()
def save_file_content(self, filename: str, content):
print('Saving content to file: ' + filename)
with open(filename, 'w') as file:
file.write(content)
def create_entity(self, f, entityyml):
entitytype = entityyml.get('type')
#FIXME: einen Prozessschritt eher auswerten
schema = self.get_type_property(entitytype,'schema')
if schema:
self.validate_definition(entityyml.get('type') + ': ' + entityyml.get('name'), schema, entityyml)
classmap = {
'hub': Hub
,'delivery': Delivery
,'satellite':Satellite
,'link':Link
,'view':View
,'pit':PIT
,'bridge':Bridge
,'reference':Reference
,'sourcesystem':SourceSystem
,'source':Interface
,'generictable':GenericTable
,'generictransformation':GenericTransformation
,'generictask':GenericTask
,'report': Report
,'composite': Composite
,'subdag': SubDag
}
if entitytype in classmap:
return classmap[entitytype](self, f, entityyml)
else:
return None
def add_entity(self, entity):
"""adds a new entitity. If an entity with the same name exists, it wont work."""
if entity.name not in self.entities:
self.entities[entity.name] = entity
else:
logging.error('entity %s already exists in model', entity.name)
#if entity.type in ('delivery'):
# derived = Derived(self,'',entity,{'derived_from_type': 'delivery'})
# self.entities[derived.name] = derived
def update_entity(self, entity):
"""replace an existing entity with a new one."""
self.entities[entity.name] = entity
def load_entity_from_file(self, filename):
pass
def validate_definition(self, title, schema, definition, allow_unknown=False, failonerror=True):
self.validator.allow_unknown = allow_unknown
if not self.validator.validate(definition, schema):
tree = Tree(title)
for field, errors in self.validator.errors.items():
for e in errors:
fieldtree = tree.add(field)
if type(e) is dict:
for itemno, itemerrors in e.items():
itemtree = fieldtree.add('item' + str(itemno))
#print(' |-- item ', itemno, ':')
for itemerror in itemerrors:
if type(itemerror) is dict:
for fieldname, fielderror in itemerror.items():
itemtree.add(fieldname + ': ' + str(fielderror))
#print(' |-- ', fieldname, ': ', fielderror)
else:
itemtree.add(str(itemerror))
#print(' |-- ', itemerror)
else:
fieldtree.add(str(e))
print(Panel(tree, title="[red]SCHEMA ERROR", expand=False,padding=1 ))
logging.error(self.validator.errors)
if failonerror:
sys.exit(2)
return False
return True
def validate_entities_schemas(self):
errorcount = 0
folder = self.config.paths.entities
for f in glob.glob(folder + "/**/*.yaml", recursive=True):
try:
with open(f, 'r') as file:
entityyml = yaml.load(file, Loader=Loader)
baseschema = {'name': {'type': 'string', 'required': True}
, 'type': {'required': True,
'allowed': list(self.get_types().keys())}
}
valid_base = self.validate_definition(f, baseschema, entityyml, True, False)
if valid_base:
entitytype = entityyml.get('type')
schema = self.get_type_property(entitytype, 'schema')
if schema:
valid = self.validate_definition(entityyml.get('type') + ': ' + entityyml.get('name'),
schema, entityyml, False, False)
if not valid:
errorcount += 1
else:
errorcount += 1
except (yaml.scanner.ScannerError, UnicodeDecodeError) as e:
print("")
logging.error(e)
sys.exit(2)
return errorcount
def load_entities(self):
folder = self.config.path.joinpath(self.config.paths.entities)
logging.info('reading entities from: %s', folder)
baseschema = {'name': {'type': 'string', 'required': True}
, 'type': {'required': True,
'allowed': list(self.get_types().keys())}
, 'description': {'type': 'string'}
, 'subtype': {'type': 'string'}
, 'generate': {'type': 'integer', 'allowed': [0, 1]}
}
numfiles = len(list(folder.glob('**/*.yaml')))
#i = 0
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(style=Style(color="green")),
TimeElapsedColumn()
) as progress:
task1 = progress.add_task("[blue]Loading: ", total=numfiles)
for f in sorted(folder.glob('**/*.yaml')):
if f.name.startswith(self.config.model.ignore_file_prefix):
logging.info('ignore file because of prefix: %s', f.relative_to(folder))
continue
logging.info('reading entity: %s', f.relative_to(folder))
try:
with open(f, 'r') as file:
entityyml = yaml.load(file, Loader=Loader)
if not entityyml:
print("")
logging.error('document empty: ' + f.name)
print('document empty: ', f.name)
sys.exit(2)
self.validate_definition(f.name, baseschema, entityyml, True)
except (yaml.scanner.ScannerError, UnicodeDecodeError) as e:
print("")
logging.error(e)
sys.exit(2)
entitytype = entityyml.get('type')
namespace = self.get_type_property(entitytype, 'namespace')
if namespace == 'model':
self.add_entity(self.create_entity(f, entityyml))
if namespace == 'sourcesystem':
entityname = entityyml.get('name')
# IMPROVE: umsetzung inkonsistent zu entities -> self.add_entity
self.sourcesystems[entityname] = self.create_entity(f, entityyml)
if namespace == 'source':
entityname = entityyml.get('name')
# IMPROVE: umsetzung inkonsistent zu entities -> self.add_entity
self.interfaces[entityname] = self.create_entity(f, entityyml)
if namespace == 'dag':
entityname = entityyml.get('name')
# IMPROVE: umsetzung inkonsistent zu entities -> self.add_entity
self.subdags[entityname] = self.create_entity(f, entityyml)
progress.update(task1, advance=1)
def load_mappings(self):
folder = self.config.path.joinpath(self.config.paths.mappings)
logging.info('reading mappings from %s', folder)
schema = self.get_type_property('mapping', 'schema')
for f in sorted(folder.glob('**/*.yaml')):
logging.info('reading mapping: %s', f.relative_to(folder))
try:
with open(f, 'r') as file:
mappingyml = yaml.load(file, Loader=Loader)
self.validate_definition('Mapping: ' + f.name, schema, {'root': mappingyml})
for sourceentityname in mappingyml.keys():
self.mappings[sourceentityname] = Mapping(self, sourceentityname, f.name, mappingyml[sourceentityname])
except yaml.scanner.ScannerError as e:
print("")
logging.error(e)
sys.exit(2)
def validate_mappings(self):
errors = ErrorCollection()
mapping_count = len(self.mappings)
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
) as progress:
task1 = progress.add_task("[blue]Validating Mappings: ", total=mapping_count)
for m in self.get_mappings().values():
errors.append(m.validate())
progress.update(task1, advance=1)
for rm in errors.errors: # TODO: merge message-paths
log(logging.ERROR, rm.get('title'), rm.get('path'), rm.get('message'), True)
if errors.count != 0:
logging.error('%i errors found while validating mappings.', errors.count)
return errors
def validate_entities(self):
errors = ErrorCollection()
entity_count = len(self.entities) + len(self.interfaces) + len(self.subdags)
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
) as progress:
task1 = progress.add_task("[blue]Validating Entities: ", total=entity_count)
for e in self.entities.values():
progress.update(task1, advance=1)
errors.append(e.validate())
for e in self.get_interfaces().values():
progress.update(task1, advance=1)
errors.append(e.validate())
for e in self.get_subdags().values():
progress.update(task1, advance=1)
errors.append(e.validate())
#for rm in result['messages']:
# for p in rm.get('path'):
# print(p)
# print(rm.get('path'), rm.get('message'))
for rm in errors.errors: # TODO: merge message-paths
log(logging.ERROR, rm.get('title'), rm.get('path'), rm.get('message'), True)
if errors.count != 0:
logging.error('%i errors found while validating entities.', errors.count)
return errors
def build_dag(self):
logging.info('building dag... ')
#TODO: Möglichkeit eine entity im Dag zu ignorieren einbauen (auf entity-level)
entity_count = len(self.get_entities().keys())
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
) as progress:
task1 = progress.add_task("[blue]Building Dag: ", total=entity_count)
for i, (k,e) in enumerate(self.get_entities().items()):
progress.update(task1, advance=1)
namespace = self.get_type_property(e.type, 'namespace')
if namespace =='model':
logging.info('adding node: %s', e.name)
self.dag.add_node(DagNode(e.name,e))
for c in e.get_component_entities():
c_namespace = self.get_type_property(c.get('type'), 'namespace')
if c_namespace == 'model':
logging.info('adding edge: %s -> %s', c.get('component').name, e.name)
self.dag.add_edge((c.get('component').name, e.name))
def render_entity_templates(self):
targetroot = self.config.path.joinpath(self.config.paths.output)
logging.info('render entity templates to: %s', targetroot)
entity_count = len(self.entities.keys()) +len(self.interfaces.keys()) + len(self.subdags.keys())
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
) as progress:
task1 = progress.add_task("[blue]Generating Entities: ", total=entity_count)
for i, (k,entity) in enumerate(self.entities.items() | self.interfaces.items() | self.subdags.items() ) :
progress.update(task1, advance=1)
if entity.generate == 0:
logging.info('skipping Entity %s (generate=0)', entity.name)
continue
if self.config.generator.get(entity.type.lower()):
for templateconfig in self.config.generator.get(entity.type.lower()).get('templates'):
if entity.subtype in templateconfig.get('subtype', 'base'):
targetfolder = templateconfig['targetfolder']
outfile = templateconfig['filename']
templatefilename = templateconfig['template']
synchtarget = templateconfig.get('synchtarget','')
#Parse targetfolder and -filename templates:
templatefilename = self.templateEnvironment.from_string(templatefilename).render(entity=entity, model=self)
targetfolder = self.templateEnvironment.from_string(targetfolder).render(entity=entity, model=self)
outfile = self.templateEnvironment.from_string(outfile).render(entity=entity, model=self)
synchtarget = self.templateEnvironment.from_string(synchtarget).render(entity=entity, model=self)
targetfolder = targetroot.joinpath(targetfolder)
makedirs(targetfolder, exist_ok=True)
filename = targetfolder.joinpath(outfile)
logging.info('rendering Entity %s with template "%s" to %s ...',
entity.name, templatefilename, filename.relative_to(targetroot))
output = entity.render_template(templatefilename)
if output: # Leere Template-Results werden nicht als File geschrieben
checksum = hashlib.md5(bytes(output,encoding ='utf-8')).hexdigest()
if not self.cdc.get(filename):
self.cdc[filename] = dict(changed='new')
self.cdc[filename]['current'] = checksum
if checksum != self.cdc.get(filename,{}).get('previous'): # Only write file if content has changed
with open(filename, "w") as file:
file.write(output)
if self.cdc[filename]['changed'] != 'new':
self.cdc[filename]['changed'] = 'update'
else:
self.cdc[filename]['changed'] = 'same'
if synchtarget:
synchtarget = self.config.path.joinpath(synchtarget)
self.cdc[filename]['synchto'] = synchtarget
else:
logging.info('skipping Entity %s with template "%s" because the result is empty', entity.name,
templatefilename)
def render_model_templates(self):
targetroot = self.config.path.joinpath(self.config.paths.output)
logging.info('render model templates to: %s', targetroot)
entity_count = len(self.config.generator['model']['templates'])
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
) as progress:
task1 = progress.add_task("[blue]Generating Model: ", total=entity_count)
for templateconfig in self.config.generator['model']['templates']:
progress.update(task1, advance=1)
targetfolder = templateconfig['targetfolder']
outfile = templateconfig['filename']
synchtarget = templateconfig.get('synchtarget','')
#Parse targetfolder and -filename templates:
templateconfig['template'] = self.templateEnvironment.from_string(templateconfig['template']).render(model=self)
targetfolder = self.templateEnvironment.from_string(targetfolder).render(model=self)
outfile = self.templateEnvironment.from_string(outfile).render(model=self)
synchtarget = self.templateEnvironment.from_string(synchtarget).render(model=self)
targetfolder = targetroot.joinpath(targetfolder)
makedirs(targetfolder, exist_ok=True)
filename = targetfolder.joinpath(outfile)
logging.info('rendering Model to %s ...', filename.relative_to(targetroot))
try:
template = self.templateEnvironment.get_template(templateconfig['template'])
output = template.render(
model=self,
templatename=templateconfig['template']
)
if output: # Leere Template-Results werden nicht als File geschrieben
checksum = hashlib.md5(bytes(output,encoding ='utf-8')).hexdigest()
if not self.cdc.get(filename):
self.cdc[filename] = dict(changed='new')
self.cdc[filename]['current'] = checksum
if checksum != self.cdc.get(filename,{}).get('previous'): # Only write file if content has changed
with open(filename, "w") as file:
file.write(output)
if self.cdc[filename]['changed'] != 'new':
self.cdc[filename]['changed'] = 'update'
else:
self.cdc[filename]['changed'] = 'same'
if synchtarget:
synchtarget = self.config.path.joinpath(synchtarget)
self.cdc[filename]['synchto'] = synchtarget
except TemplateNotFound:
print("")
print(Panel(f"[red]Error while rendering model-templates[/red]: Template {templateconfig['template']} not found.", title="[red]RENDER ERROR", padding=1,title_align="left" ))
#print(f"Config: Template {templateconfig['template']} not found.")
logging.error(f"Template {templateconfig['template']} not found.")
sys.exit(2)
except UndefinedError as e:
print("")
logging.error(f"Error while rendering model with Template {templateconfig['template']} :")
logging.error(e)
print(f"Error while rendering model with Template {templateconfig['template']} :", e)
sys.exit(2)
def run_model_hooks(self, type):
# type = 'pre_hooks', 'post_hooks'
if self.config.get(type):
for hookname, hook in self.config.get(type,{}).items():
print('---------------------------------------------------------------------------------')
print('hook:', hookname)
print('---------------------------------------------------------------------------------')
result = subprocess.run(hook, capture_output=True, text=True,shell=True)
print(result.stdout)
print(result.stderr)
def capture_changes_before(self):
targetroot = self.config.path.joinpath(self.config.paths.output)
self.cdc = {}
for f in sorted(targetroot.glob('**/*.*')):
filename = f #.relative_to(targetroot)
with open(f, 'r') as inputfile:
checksum = hashlib.md5(inputfile.read().encode('UTF-8')).hexdigest()
self.cdc[filename] = {'current': '',
'previous': checksum,
'changed': 'deleted' # set changed to 'deleted' - when rendering state changes to 'new', 'same', 'update' or stays 'deleted'
}
self.cdc_time= time.time()
#print(self.cdc)
def capture_changes_after(self):
#delete file not in output anymore
for filename, info in self.cdc.items():
if info.get('changed') == 'deleted':
filename.unlink()
#check for renamed files (compare new and deleted files)
for filename, info in self.cdc.items():
if info.get('changed') == 'deleted':
for f, i in self.cdc.items():
if i.get('changed') == 'new' and info.get('previous') == i.get('current'):
self.cdc[filename]['changed'] = 'renamed' # change 'deleted' to 'renamed'
self.cdc[filename]['newname'] = f
self.cdc[f]['changed'] = 'renametarget' # change 'new' to 'renametarget'
self.cdc[f]['oldname'] = filename
def display_changes(self):
table = Table(show_edge=False, box=box.MINIMAL)
table.add_column("State", justify="right", no_wrap=True)
table.add_column("File", style="white")
update = [f for f, i in self.cdc.items() if i.get('changed') == 'update']
new = [f for f, i in self.cdc.items() if i.get('changed') == 'new']
deleted = [f for f, i in self.cdc.items() if i.get('changed') == 'deleted']
renamed = {f:i for f, i in self.cdc.items() if i.get('changed') == 'renamed'}
for f in update:
table.add_row("[yellow]updated",str(f.relative_to(os.getcwd())))
for f in new:
table.add_row("[green]new",str(f.relative_to(os.getcwd())))
for f in deleted:
table.add_row("[red]deleted",str(f.relative_to(os.getcwd())))
for f, i in renamed.items():
table.add_row("[blue]renamed",str(f.relative_to(os.getcwd())) + " => "+ str(i.get('newname').relative_to(os.getcwd())) )
if table.row_count > 0:
print(table)
else:
print("[grey]No files changed.")
# for state in ('update','new','deleted','renamed'):
# print(state + ': ')
# for k, v in self.cdc.items():
# if v.get('changed') == state:
# if state == 'renamed':
# print(" {} => {}".format(k.relative_to(os.getcwd()), v.get('newname').relative_to(os.getcwd())))
# else:
# print(" {}".format(k.relative_to(os.getcwd())))
#
# print(' ')
def synch(self, fullsynch = False):
logging.info('Synching to targets: ...' )
if fullsynch:
items = [(k, v.get('synchto') ) for k, v in self.cdc.items() if v.get('synchto')]
else:
items = [(k, v.get('synchto') ) for k, v in self.cdc.items() if v.get('synchto') and v.get('changed') in ('update','new','renamed') ]
#i = 0
for item in items:
#i += 1
#print_progressbar(i, len(items), prefix='Synching to Target: ', suffix='Complete') #TODO: auf rich.progress umstellen
logging.info("Copy {} to {}".format(item[0].relative_to(os.getcwd()), item[1] ))
makedirs( item[1], exist_ok=True)
copy2(item[0], item[1])
def get_parsed_query(self, entity, rawquery):
""" Parses Querystrings like: Select * from {entityname1} join {entityname2}
and returns a parsed query like Select * from [database].[dbo].[entityname1]
join [database].[dbo].[entityname2] """
parsed_result = rawquery
for placeholder, queryentity in self.get_query_entities(rawquery).items():
if queryentity:
include_db = False if entity.dbentity.database == queryentity.dbentity.database else True
parsed_result = parsed_result.replace('{' + str(placeholder) + '}',
queryentity.dbentity.get_qualifier(include_db))
return parsed_result
def get_query_entities(self, rawquery):
""" Parses Querystrings like: Select * from {entityname1} join {entityname2}
and returns a list of entity instances. """
regex = r"\{(.*?)?\}"
entities = {}
matches = re.finditer(regex, rawquery, re.MULTILINE)
for matchNum, match in enumerate(matches):
for groupNum in range(0, len(match.groups())):
entities[match.group(1)] = self.get_entity(match.group(1))
return entities
def get_entity_name_suggestion(self, entity_type: list, name: str, maxdist: int = 5) -> String:
suggest = None
dist = maxdist
for e in self.entities.values():
if e.type in entity_type:
t = self.get_levenshtein_distance(name, e.name)
if t < dist:
suggest = e.name
dist = t
if t == 1:
return suggest
#print(name, e, t)
return suggest
def get_levenshtein_distance(self, word1, word2) -> int:
word2 = word2.lower()
word1 = word1.lower()
matrix = [[0 for x in range(len(word2) + 1)] for x in range(len(word1) + 1)]
for x in range(len(word1) + 1):
matrix[x][0] = x
for y in range(len(word2) + 1):
matrix[0][y] = y
for x in range(1, len(word1) + 1):
for y in range(1, len(word2) + 1):
if word1[x - 1] == word2[y - 1]:
matrix[x][y] = min(
matrix[x - 1][y] + 1,
matrix[x - 1][y - 1],
matrix[x][y - 1] + 1
)
else:
matrix[x][y] = min(
matrix[x - 1][y] + 1,
matrix[x - 1][y - 1] + 1,
matrix[x][y - 1] + 1
)
return matrix[len(word1)][len(word2)]
def create_snapshot(self, filename):
logging.info('creating snaphot: %s', filename)
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
) as progress:
entity_count = len(self.entities.keys()) +len(self.interfaces.keys()) + len(self.subdags.keys())
task1 = progress.add_task("[blue]Generating Snapshot: ", total=entity_count)
with open(filename, 'w') as file:
docs = list()
for i, (k,entity) in enumerate(self.entities.items() | self.interfaces.items() | self.subdags.items() ):
docs.append(entity._definition)
progress.update(task1, advance=1)
yaml.dump_all(
docs,
file,
default_flow_style=False,
explicit_start=False,
sort_keys=False
)
def load_snapshot(self, filename):
documents = dict()
with open(filename, 'r') as file:
for obj in list( yaml.load_all(file, Loader=yaml.FullLoader) ):
documents[obj.get('name')] = obj
print(documents)