#from DataVaultGenerator.Entities.Derived import Derived from tokenize import String from DataVaultGenerator.Entities.SubDag import SubDag import logging import glob import time from pathlib import Path, PurePath from shutil import Error, copy2 from rich.progress import Progress from rich.progress import TextColumn, BarColumn, SpinnerColumn, TaskProgressColumn, TimeElapsedColumn from rich.tree import Tree from rich.panel import Panel from rich.table import Table from rich.rule import Rule from rich import box from rich.style import Style from rich import print import yaml try: from yaml import CLoader as Loader except ImportError: from yaml import Loader import sys import hashlib import re import os import subprocess from os import makedirs from cerberus import Validator, schema_registry, rules_set_registry from jinja2 import Environment, FileSystemLoader, TemplateNotFound, UndefinedError from datetime import datetime from DataVaultGenerator.Config import Config from DataVaultGenerator.Components import ErrorCollection from DataVaultGenerator.Components import Layer, log from DataVaultGenerator.Entities.Composite import Composite from DataVaultGenerator.Entities.Delivery import Delivery from DataVaultGenerator.Entities.GenericTable import GenericTable from DataVaultGenerator.Entities.GenericTransformation import GenericTransformation from DataVaultGenerator.Entities.GenericTask import GenericTask from DataVaultGenerator.Entities.Hub import Hub from DataVaultGenerator.Entities.Interface import Interface from DataVaultGenerator.Entities.Link import Link from DataVaultGenerator.Entities.PIT import PIT from DataVaultGenerator.Entities.Bridge import Bridge from DataVaultGenerator.Entities.Reference import Reference from DataVaultGenerator.Entities.Report import Report from DataVaultGenerator.Entities.Satellite import Satellite from DataVaultGenerator.Entities.Sourcesystem import SourceSystem from DataVaultGenerator.Entities.View import View from DataVaultGenerator.Entities.SubDag import SubDag from DataVaultGenerator.Mapping import Mapping from DataVaultGenerator.Config import ConfigDict from DataVaultGenerator.Dag import Dag,DagNode from DataVaultGenerator import __version__ class Model: def __init__(self): logging.info('Init Model') self.config = Config() self.entities = {} self.mappings = {} self.sourcesystems = {} self.subdags = {} self.interfaces = {} self.layer = {} self.basetemplates = {} self.cdc = {} self.types = {} self.dag = Dag(self) self.sys_specifications = {} self.load_schema() self.load_types() self.validator = Validator(allow_unknown=False) @property def name(self): return self.config.model.name def load_schema(self): modpath = os.path.dirname(__file__) for f in glob.glob(os.path.join(modpath, "schema/registry/*.yaml"), recursive=True): with open(f, 'r') as file: definition = yaml.load(file, Loader=Loader) if definition.get('type') == 'schema': schema_registry.add(definition.get('name'),definition.get('schema')) else: rules_set_registry.add(definition.get('name'),definition.get('schema')) def load_types(self): modpath = os.path.dirname(__file__) for f in glob.glob(os.path.join(modpath, "schema/entities/*.yaml"), recursive=False): with open(f, 'r') as file: definition = yaml.load(file, Loader=Loader) self.types[definition.get('type')] = definition for f in glob.glob(os.path.join(modpath, "schema/*.yaml"), recursive=False): with open(f, 'r') as file: definition = yaml.load(file, Loader=Loader) self.types[definition.get('type')] = definition def get_types(self): return self.types def get_type_property(self, type, property): return self.types.get(type,{}).get(property,'') def get_boilerplate(self,type): return self.get_type_property(type,'boilerplate') def get_subtypes(self): return {'base': {'displayname': 'Base/Default'}, 'drivingkeystatus': {'displayname': 'Status-Satellite for Driving Key'}, 'fact': {'displayname': 'Fact Table/View'}, 'dimension': {'displayname': 'Dimension Table/View'}, '': {'displayname': 'n/a'} } def get_type_displayname(self, entity_type): return self.get_types().get(entity_type, {}).get('displayname', 'Unknown Type: ' + entity_type) def get_entities(self, generatable_only: bool = True ): if generatable_only: return {k:v for k, v in self.entities.items() if v.generate == 1} else: return self.entities def get_entities_by_type(self, entity_type: str, generatable_only: bool = True): if generatable_only: return [e for e in self.entities.values() if e.type == entity_type and e.generate == 1] else: return [e for e in self.entities.values() if e.type == entity_type] def get_entity(self, name: str): return self.entities.get(name) def get_layers(self): return self.layer def get_layer(self, name: str): return self.layer.get(name) def get_source_systems(self): return self.sourcesystems def get_source_system(self, name: str): return self.sourcesystems.get(name) def get_subdags(self): return self.subdags def get_subdag(self, name: str): return self.subdags.get(name) def get_interfaces(self): return self.interfaces def get_interface(self, name: str): return self.interfaces.get(name) def get_interface_by_source_system(self, sourcesystem): return [i for i in self.interfaces.values() if i.source_system == sourcesystem] def get_mapping(self, name: str): """return a dict of mapping by Source.""" return self.mappings.get(name) def get_mappings(self): """return a dict of mappings.""" return self.mappings def load_config(self, filename): logging.info('reading config from %s', filename) self.config.load(filename, self.get_type_property('config','schema'), self.validate_definition) self.templateEnvironment = Environment( loader=FileSystemLoader(self.config.path.joinpath(self.config.paths.templates)), **self.config.jinja.environment ) self.templateEnvironment.globals['generator_version'] = __version__ self.templateEnvironment.globals['now'] = datetime.now #unfold vars for k,v in self.config.vars.items(): self.templateEnvironment.globals['_'+k+'_'] = v for layerid, layerdefinition in self.config.layer.items(): self.layer[layerid] = Layer(self, layerid, layerdefinition) for templatekey, filename in self.config.basetemplates.items(): try: templatefilename = self.templateEnvironment.from_string(filename).render(model=self) self.basetemplates[templatekey] = self.templateEnvironment.get_template(templatefilename) except TemplateNotFound: print(f"Config: Base-Template {templatefilename} not found.") logging.error(f"Config: Base-Template {templatefilename} not found.") sys.exit(2) for k,v in self.config.sys_specification.items(): folder = self.config.path.joinpath(v) try: with open(folder, 'r') as file: specyaml = yaml.load(file, Loader=Loader) is_valid = self.validate_definition('sys_specification definition', self.get_type_property('sys_specification','schema'), specyaml) if is_valid: self.sys_specifications[k] = specyaml else: print(f"Config: sys_specification definition {folder} is not valid.") logging.error(f"Config: sys_specification definition {folder} is not valid.") sys.exit(2) except FileNotFoundError as e: print(f"Config: sys_specification {folder} not found.") logging.error(e) sys.exit(2) except yaml.scanner.ScannerError as e: print("") logging.error(e) sys.exit(2) def get_config(self): return self.config def get_file_content(self, filename: str): with open(filename, 'r') as file: return file.read() def save_file_content(self, filename: str, content): print('Saving content to file: ' + filename) with open(filename, 'w') as file: file.write(content) def create_entity(self, f, entityyml): entitytype = entityyml.get('type') #FIXME: einen Prozessschritt eher auswerten schema = self.get_type_property(entitytype,'schema') if schema: self.validate_definition(entityyml.get('type') + ': ' + entityyml.get('name'), schema, entityyml) classmap = { 'hub': Hub ,'delivery': Delivery ,'satellite':Satellite ,'link':Link ,'view':View ,'pit':PIT ,'bridge':Bridge ,'reference':Reference ,'sourcesystem':SourceSystem ,'source':Interface ,'generictable':GenericTable ,'generictransformation':GenericTransformation ,'generictask':GenericTask ,'report': Report ,'composite': Composite ,'subdag': SubDag } if entitytype in classmap: return classmap[entitytype](self, f, entityyml) else: return None def add_entity(self, entity): """adds a new entitity. If an entity with the same name exists, it wont work.""" if entity.name not in self.entities: self.entities[entity.name] = entity else: logging.error('entity %s already exists in model', entity.name) #if entity.type in ('delivery'): # derived = Derived(self,'',entity,{'derived_from_type': 'delivery'}) # self.entities[derived.name] = derived def update_entity(self, entity): """replace an existing entity with a new one.""" self.entities[entity.name] = entity def load_entity_from_file(self, filename): pass def validate_definition(self, title, schema, definition, allow_unknown=False, failonerror=True): self.validator.allow_unknown = allow_unknown if not self.validator.validate(definition, schema): tree = Tree(title) for field, errors in self.validator.errors.items(): for e in errors: fieldtree = tree.add(field) if type(e) is dict: for itemno, itemerrors in e.items(): itemtree = fieldtree.add('item' + str(itemno)) #print(' |-- item ', itemno, ':') for itemerror in itemerrors: if type(itemerror) is dict: for fieldname, fielderror in itemerror.items(): itemtree.add(fieldname + ': ' + str(fielderror)) #print(' |-- ', fieldname, ': ', fielderror) else: itemtree.add(str(itemerror)) #print(' |-- ', itemerror) else: fieldtree.add(str(e)) print(Panel(tree, title="[red]SCHEMA ERROR", expand=False,padding=1 )) logging.error(self.validator.errors) if failonerror: sys.exit(2) return False return True def validate_entities_schemas(self): errorcount = 0 folder = self.config.paths.entities for f in glob.glob(folder + "/**/*.yaml", recursive=True): try: with open(f, 'r') as file: entityyml = yaml.load(file, Loader=Loader) baseschema = {'name': {'type': 'string', 'required': True} , 'type': {'required': True, 'allowed': list(self.get_types().keys())} } valid_base = self.validate_definition(f, baseschema, entityyml, True, False) if valid_base: entitytype = entityyml.get('type') schema = self.get_type_property(entitytype, 'schema') if schema: valid = self.validate_definition(entityyml.get('type') + ': ' + entityyml.get('name'), schema, entityyml, False, False) if not valid: errorcount += 1 else: errorcount += 1 except (yaml.scanner.ScannerError, UnicodeDecodeError) as e: print("") logging.error(e) sys.exit(2) return errorcount def load_entities(self): folder = self.config.path.joinpath(self.config.paths.entities) logging.info('reading entities from: %s', folder) baseschema = {'name': {'type': 'string', 'required': True} , 'type': {'required': True, 'allowed': list(self.get_types().keys())} , 'description': {'type': 'string'} , 'subtype': {'type': 'string'} , 'generate': {'type': 'integer', 'allowed': [0, 1]} } numfiles = len(list(folder.glob('**/*.yaml'))) #i = 0 with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), TaskProgressColumn(style=Style(color="green")), TimeElapsedColumn() ) as progress: task1 = progress.add_task("[blue]Loading: ", total=numfiles) for f in sorted(folder.glob('**/*.yaml')): if f.name.startswith(self.config.model.ignore_file_prefix): logging.info('ignore file because of prefix: %s', f.relative_to(folder)) continue logging.info('reading entity: %s', f.relative_to(folder)) try: with open(f, 'r') as file: entityyml = yaml.load(file, Loader=Loader) if not entityyml: print("") logging.error('document empty: ' + f.name) print('document empty: ', f.name) sys.exit(2) self.validate_definition(f.name, baseschema, entityyml, True) except (yaml.scanner.ScannerError, UnicodeDecodeError) as e: print("") logging.error(e) sys.exit(2) entitytype = entityyml.get('type') namespace = self.get_type_property(entitytype, 'namespace') if namespace == 'model': self.add_entity(self.create_entity(f, entityyml)) if namespace == 'sourcesystem': entityname = entityyml.get('name') # IMPROVE: umsetzung inkonsistent zu entities -> self.add_entity self.sourcesystems[entityname] = self.create_entity(f, entityyml) if namespace == 'source': entityname = entityyml.get('name') # IMPROVE: umsetzung inkonsistent zu entities -> self.add_entity self.interfaces[entityname] = self.create_entity(f, entityyml) if namespace == 'dag': entityname = entityyml.get('name') # IMPROVE: umsetzung inkonsistent zu entities -> self.add_entity self.subdags[entityname] = self.create_entity(f, entityyml) progress.update(task1, advance=1) def load_mappings(self): folder = self.config.path.joinpath(self.config.paths.mappings) logging.info('reading mappings from %s', folder) schema = self.get_type_property('mapping', 'schema') for f in sorted(folder.glob('**/*.yaml')): logging.info('reading mapping: %s', f.relative_to(folder)) try: with open(f, 'r') as file: mappingyml = yaml.load(file, Loader=Loader) self.validate_definition('Mapping: ' + f.name, schema, {'root': mappingyml}) for sourceentityname in mappingyml.keys(): self.mappings[sourceentityname] = Mapping(self, sourceentityname, f.name, mappingyml[sourceentityname]) except yaml.scanner.ScannerError as e: print("") logging.error(e) sys.exit(2) def validate_mappings(self): errors = ErrorCollection() mapping_count = len(self.mappings) with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), TaskProgressColumn(), TimeElapsedColumn(), ) as progress: task1 = progress.add_task("[blue]Validating Mappings: ", total=mapping_count) for m in self.get_mappings().values(): errors.append(m.validate()) progress.update(task1, advance=1) for rm in errors.errors: # TODO: merge message-paths log(logging.ERROR, rm.get('title'), rm.get('path'), rm.get('message'), True) if errors.count != 0: logging.error('%i errors found while validating mappings.', errors.count) return errors def validate_entities(self): errors = ErrorCollection() entity_count = len(self.entities) + len(self.interfaces) + len(self.subdags) with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), TaskProgressColumn(), TimeElapsedColumn(), ) as progress: task1 = progress.add_task("[blue]Validating Entities: ", total=entity_count) for e in self.entities.values(): progress.update(task1, advance=1) errors.append(e.validate()) for e in self.get_interfaces().values(): progress.update(task1, advance=1) errors.append(e.validate()) for e in self.get_subdags().values(): progress.update(task1, advance=1) errors.append(e.validate()) #for rm in result['messages']: # for p in rm.get('path'): # print(p) # print(rm.get('path'), rm.get('message')) for rm in errors.errors: # TODO: merge message-paths log(logging.ERROR, rm.get('title'), rm.get('path'), rm.get('message'), True) if errors.count != 0: logging.error('%i errors found while validating entities.', errors.count) return errors def build_dag(self): logging.info('building dag... ') #TODO: Möglichkeit eine entity im Dag zu ignorieren einbauen (auf entity-level) entity_count = len(self.get_entities().keys()) with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), TaskProgressColumn(), TimeElapsedColumn(), ) as progress: task1 = progress.add_task("[blue]Building Dag: ", total=entity_count) for i, (k,e) in enumerate(self.get_entities().items()): progress.update(task1, advance=1) namespace = self.get_type_property(e.type, 'namespace') if namespace =='model': logging.info('adding node: %s', e.name) self.dag.add_node(DagNode(e.name,e)) for c in e.get_component_entities(): c_namespace = self.get_type_property(c.get('type'), 'namespace') if c_namespace == 'model': logging.info('adding edge: %s -> %s', c.get('component').name, e.name) self.dag.add_edge((c.get('component').name, e.name)) def render_entity_templates(self): targetroot = self.config.path.joinpath(self.config.paths.output) logging.info('render entity templates to: %s', targetroot) entity_count = len(self.entities.keys()) +len(self.interfaces.keys()) + len(self.subdags.keys()) with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), TaskProgressColumn(), TimeElapsedColumn(), ) as progress: task1 = progress.add_task("[blue]Generating Entities: ", total=entity_count) for i, (k,entity) in enumerate(self.entities.items() | self.interfaces.items() | self.subdags.items() ) : progress.update(task1, advance=1) if entity.generate == 0: logging.info('skipping Entity %s (generate=0)', entity.name) continue if self.config.generator.get(entity.type.lower()): for templateconfig in self.config.generator.get(entity.type.lower()).get('templates'): if entity.subtype in templateconfig.get('subtype', 'base'): targetfolder = templateconfig['targetfolder'] outfile = templateconfig['filename'] templatefilename = templateconfig['template'] synchtarget = templateconfig.get('synchtarget','') #Parse targetfolder and -filename templates: templatefilename = self.templateEnvironment.from_string(templatefilename).render(entity=entity, model=self) targetfolder = self.templateEnvironment.from_string(targetfolder).render(entity=entity, model=self) outfile = self.templateEnvironment.from_string(outfile).render(entity=entity, model=self) synchtarget = self.templateEnvironment.from_string(synchtarget).render(entity=entity, model=self) targetfolder = targetroot.joinpath(targetfolder) makedirs(targetfolder, exist_ok=True) filename = targetfolder.joinpath(outfile) logging.info('rendering Entity %s with template "%s" to %s ...', entity.name, templatefilename, filename.relative_to(targetroot)) output = entity.render_template(templatefilename) if output: # Leere Template-Results werden nicht als File geschrieben checksum = hashlib.md5(bytes(output,encoding ='utf-8')).hexdigest() if not self.cdc.get(filename): self.cdc[filename] = dict(changed='new') self.cdc[filename]['current'] = checksum if checksum != self.cdc.get(filename,{}).get('previous'): # Only write file if content has changed with open(filename, "w") as file: file.write(output) if self.cdc[filename]['changed'] != 'new': self.cdc[filename]['changed'] = 'update' else: self.cdc[filename]['changed'] = 'same' if synchtarget: synchtarget = self.config.path.joinpath(synchtarget) self.cdc[filename]['synchto'] = synchtarget else: logging.info('skipping Entity %s with template "%s" because the result is empty', entity.name, templatefilename) def render_model_templates(self): targetroot = self.config.path.joinpath(self.config.paths.output) logging.info('render model templates to: %s', targetroot) entity_count = len(self.config.generator['model']['templates']) with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), TaskProgressColumn(), TimeElapsedColumn(), ) as progress: task1 = progress.add_task("[blue]Generating Model: ", total=entity_count) for templateconfig in self.config.generator['model']['templates']: progress.update(task1, advance=1) targetfolder = templateconfig['targetfolder'] outfile = templateconfig['filename'] synchtarget = templateconfig.get('synchtarget','') #Parse targetfolder and -filename templates: templateconfig['template'] = self.templateEnvironment.from_string(templateconfig['template']).render(model=self) targetfolder = self.templateEnvironment.from_string(targetfolder).render(model=self) outfile = self.templateEnvironment.from_string(outfile).render(model=self) synchtarget = self.templateEnvironment.from_string(synchtarget).render(model=self) targetfolder = targetroot.joinpath(targetfolder) makedirs(targetfolder, exist_ok=True) filename = targetfolder.joinpath(outfile) logging.info('rendering Model to %s ...', filename.relative_to(targetroot)) try: template = self.templateEnvironment.get_template(templateconfig['template']) output = template.render( model=self, templatename=templateconfig['template'] ) if output: # Leere Template-Results werden nicht als File geschrieben checksum = hashlib.md5(bytes(output,encoding ='utf-8')).hexdigest() if not self.cdc.get(filename): self.cdc[filename] = dict(changed='new') self.cdc[filename]['current'] = checksum if checksum != self.cdc.get(filename,{}).get('previous'): # Only write file if content has changed with open(filename, "w") as file: file.write(output) if self.cdc[filename]['changed'] != 'new': self.cdc[filename]['changed'] = 'update' else: self.cdc[filename]['changed'] = 'same' if synchtarget: synchtarget = self.config.path.joinpath(synchtarget) self.cdc[filename]['synchto'] = synchtarget except TemplateNotFound: print("") print(Panel(f"[red]Error while rendering model-templates[/red]: Template {templateconfig['template']} not found.", title="[red]RENDER ERROR", padding=1,title_align="left" )) #print(f"Config: Template {templateconfig['template']} not found.") logging.error(f"Template {templateconfig['template']} not found.") sys.exit(2) except UndefinedError as e: print("") logging.error(f"Error while rendering model with Template {templateconfig['template']} :") logging.error(e) print(f"Error while rendering model with Template {templateconfig['template']} :", e) sys.exit(2) def run_model_hooks(self, type): # type = 'pre_hooks', 'post_hooks' if self.config.get(type): for hookname, hook in self.config.get(type,{}).items(): print('---------------------------------------------------------------------------------') print('hook:', hookname) print('---------------------------------------------------------------------------------') result = subprocess.run(hook, capture_output=True, text=True,shell=True) print(result.stdout) print(result.stderr) def capture_changes_before(self): targetroot = self.config.path.joinpath(self.config.paths.output) self.cdc = {} for f in sorted(targetroot.glob('**/*.*')): filename = f #.relative_to(targetroot) with open(f, 'r') as inputfile: checksum = hashlib.md5(inputfile.read().encode('UTF-8')).hexdigest() self.cdc[filename] = {'current': '', 'previous': checksum, 'changed': 'deleted' # set changed to 'deleted' - when rendering state changes to 'new', 'same', 'update' or stays 'deleted' } self.cdc_time= time.time() #print(self.cdc) def capture_changes_after(self): #delete file not in output anymore for filename, info in self.cdc.items(): if info.get('changed') == 'deleted': filename.unlink() #check for renamed files (compare new and deleted files) for filename, info in self.cdc.items(): if info.get('changed') == 'deleted': for f, i in self.cdc.items(): if i.get('changed') == 'new' and info.get('previous') == i.get('current'): self.cdc[filename]['changed'] = 'renamed' # change 'deleted' to 'renamed' self.cdc[filename]['newname'] = f self.cdc[f]['changed'] = 'renametarget' # change 'new' to 'renametarget' self.cdc[f]['oldname'] = filename def display_changes(self): table = Table(show_edge=False, box=box.MINIMAL) table.add_column("State", justify="right", no_wrap=True) table.add_column("File", style="white") update = [f for f, i in self.cdc.items() if i.get('changed') == 'update'] new = [f for f, i in self.cdc.items() if i.get('changed') == 'new'] deleted = [f for f, i in self.cdc.items() if i.get('changed') == 'deleted'] renamed = {f:i for f, i in self.cdc.items() if i.get('changed') == 'renamed'} for f in update: table.add_row("[yellow]updated",str(f.relative_to(os.getcwd()))) for f in new: table.add_row("[green]new",str(f.relative_to(os.getcwd()))) for f in deleted: table.add_row("[red]deleted",str(f.relative_to(os.getcwd()))) for f, i in renamed.items(): table.add_row("[blue]renamed",str(f.relative_to(os.getcwd())) + " => "+ str(i.get('newname').relative_to(os.getcwd())) ) if table.row_count > 0: print(table) else: print("[grey]No files changed.") # for state in ('update','new','deleted','renamed'): # print(state + ': ') # for k, v in self.cdc.items(): # if v.get('changed') == state: # if state == 'renamed': # print(" {} => {}".format(k.relative_to(os.getcwd()), v.get('newname').relative_to(os.getcwd()))) # else: # print(" {}".format(k.relative_to(os.getcwd()))) # # print(' ') def synch(self, fullsynch = False): logging.info('Synching to targets: ...' ) if fullsynch: items = [(k, v.get('synchto') ) for k, v in self.cdc.items() if v.get('synchto')] else: items = [(k, v.get('synchto') ) for k, v in self.cdc.items() if v.get('synchto') and v.get('changed') in ('update','new','renamed') ] #i = 0 for item in items: #i += 1 #print_progressbar(i, len(items), prefix='Synching to Target: ', suffix='Complete') #TODO: auf rich.progress umstellen logging.info("Copy {} to {}".format(item[0].relative_to(os.getcwd()), item[1] )) makedirs( item[1], exist_ok=True) copy2(item[0], item[1]) def get_parsed_query(self, entity, rawquery): """ Parses Querystrings like: Select * from {entityname1} join {entityname2} and returns a parsed query like Select * from [database].[dbo].[entityname1] join [database].[dbo].[entityname2] """ parsed_result = rawquery for placeholder, queryentity in self.get_query_entities(rawquery).items(): if queryentity: include_db = False if entity.dbentity.database == queryentity.dbentity.database else True parsed_result = parsed_result.replace('{' + str(placeholder) + '}', queryentity.dbentity.get_qualifier(include_db)) return parsed_result def get_query_entities(self, rawquery): """ Parses Querystrings like: Select * from {entityname1} join {entityname2} and returns a list of entity instances. """ regex = r"\{(.*?)?\}" entities = {} matches = re.finditer(regex, rawquery, re.MULTILINE) for matchNum, match in enumerate(matches): for groupNum in range(0, len(match.groups())): entities[match.group(1)] = self.get_entity(match.group(1)) return entities def get_entity_name_suggestion(self, entity_type: list, name: str, maxdist: int = 5) -> String: suggest = None dist = maxdist for e in self.entities.values(): if e.type in entity_type: t = self.get_levenshtein_distance(name, e.name) if t < dist: suggest = e.name dist = t if t == 1: return suggest #print(name, e, t) return suggest def get_levenshtein_distance(self, word1, word2) -> int: word2 = word2.lower() word1 = word1.lower() matrix = [[0 for x in range(len(word2) + 1)] for x in range(len(word1) + 1)] for x in range(len(word1) + 1): matrix[x][0] = x for y in range(len(word2) + 1): matrix[0][y] = y for x in range(1, len(word1) + 1): for y in range(1, len(word2) + 1): if word1[x - 1] == word2[y - 1]: matrix[x][y] = min( matrix[x - 1][y] + 1, matrix[x - 1][y - 1], matrix[x][y - 1] + 1 ) else: matrix[x][y] = min( matrix[x - 1][y] + 1, matrix[x - 1][y - 1] + 1, matrix[x][y - 1] + 1 ) return matrix[len(word1)][len(word2)] def create_snapshot(self, filename): logging.info('creating snaphot: %s', filename) with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), TaskProgressColumn(), TimeElapsedColumn(), ) as progress: entity_count = len(self.entities.keys()) +len(self.interfaces.keys()) + len(self.subdags.keys()) task1 = progress.add_task("[blue]Generating Snapshot: ", total=entity_count) with open(filename, 'w') as file: docs = list() for i, (k,entity) in enumerate(self.entities.items() | self.interfaces.items() | self.subdags.items() ): docs.append(entity._definition) progress.update(task1, advance=1) yaml.dump_all( docs, file, default_flow_style=False, explicit_start=False, sort_keys=False ) def load_snapshot(self, filename): documents = dict() with open(filename, 'r') as file: for obj in list( yaml.load_all(file, Loader=yaml.FullLoader) ): documents[obj.get('name')] = obj print(documents)