Source code for linchemin.rem.graph_distance

import abc
import multiprocessing as mp
from dataclasses import dataclass
from functools import partial
from typing import List, Union

import networkx as nx
import numpy as np
import pandas as pd

import linchemin.cheminfo.functions as cif
from linchemin import settings
from linchemin.cgu.syngraph import BipartiteSynGraph, MonopartiteReacSynGraph
from linchemin.cgu.translate import translator
from linchemin.cheminfo.chemical_similarity import (
    compute_mol_fingerprint,
    compute_reaction_fingerprint,
    compute_similarity,
)
from linchemin.cheminfo.models import ChemicalEquation, Molecule
from linchemin.configuration.defaults import DEFAULT_GED
from linchemin.utilities import console_logger

"""
Module containing classes and functions
to compute the similarity between pairs of routes.
"""

logger = console_logger(__name__)


class GraphDistanceError(Exception):
    """Base class for exceptions leading to unsuccessful distance calculation."""

    pass


class UnavailableGED(GraphDistanceError):
    """Raised if the selected method to compute
    the graph distance is not among the available ones."""

    pass


class MismatchingGraph(GraphDistanceError):
    """Raised if the input graphs are of different types"""

    pass


class TooFewRoutes(GraphDistanceError):
    """Raised if fewer than 2 routes are passed when computing the distance matrix"""

    pass


@dataclass
class ChemicalSimilarityParameters:
    reaction_fingerprint: str = settings.GED.reaction_fp
    reaction_fp_params: Union[dict, None] = settings.GED.reaction_fp_params
    reaction_similarity: str = settings.GED.reaction_similarity_name
    molecular_fingerprint: str = settings.GED.molecular_fp
    molecular_fp_params: Union[dict, None] = settings.GED.molecular_fp_params
    molecular_fp_count_vect: bool = settings.GED.molecular_fp_count_vect
    molecular_similarity_name: str = settings.GED.molecular_similarity_name


[docs] class Ged(metaclass=abc.ABCMeta): """Abstract class for Ged calculators."""
[docs] @abc.abstractmethod def compute_ged( self, syngraph1: Union[MonopartiteReacSynGraph, BipartiteSynGraph], syngraph2: Union[MonopartiteReacSynGraph, BipartiteSynGraph], ged_params: ChemicalSimilarityParameters, ) -> float: """ To calculate the Graph Edit Distance for a pair of graphs. Parameters: ------------ syngraph1: Union[MonopartiteReacSynGraph, BipartiteSynGraph] The first graph syngraph2: Union[MonopartiteReacSynGraph, BipartiteSynGraph] The second graph ged_params: ChemicalSimilarityParameters It contains the parameters to be used in the chemical similarity calculation Returns: --------- ged: float The value of the GED """ pass
class GedOptNx(Ged): """Subclass for the calculation of the optimized GED algorithm as implemented in NetworkX.""" def compute_ged( self, syngraph1: Union[MonopartiteReacSynGraph, BipartiteSynGraph], syngraph2: Union[MonopartiteReacSynGraph, BipartiteSynGraph], ged_params: ChemicalSimilarityParameters, ) -> float: """Takes two SynGraph instances, fingerprints and similarity methods for both molecules and reactions and returns the GED between the two graphs as computed by the optimized GED algorithm in NetworkX.""" # noqa: E501 if isinstance(syngraph1, MonopartiteReacSynGraph) and isinstance( syngraph2, MonopartiteReacSynGraph ): out_data_model = "monopartite_reactions" elif isinstance(syngraph1, BipartiteSynGraph) and isinstance( syngraph2, BipartiteSynGraph ): out_data_model = "bipartite" else: logger.error( f"Graph1 has type = {type(syngraph1)}" f"Graph2 has type = {type(syngraph2)}. " f"The GED cannot be computed between graph of different types." ) raise MismatchingGraph nx_graphs = [ translator("syngraph", s, "networkx", out_data_model=out_data_model) for s in [syngraph1, syngraph2] ] # The cost function uses the selected reaction # and molecular fingerprints and the selected similarity type. node_subst_cost_partial = partial( node_subst_cost, reaction_fingerprints=ged_params.reaction_fingerprint, reaction_fp_params=ged_params.reaction_fp_params, reaction_similarity_name=ged_params.reaction_similarity, molecular_fingerprint=ged_params.molecular_fingerprint, molecular_fp_params=ged_params.molecular_fp_params, molecular_fp_count_vect=ged_params.molecular_fp_count_vect, molecular_similarity_name=ged_params.molecular_similarity_name, ) opt_ged = nx.optimize_graph_edit_distance( nx_graphs[0], nx_graphs[1], node_subst_cost=node_subst_cost_partial ) for g in opt_ged: min_g = g return min_g class GedNxPrecomputedMatrix(Ged): """Subclass for the calculation of the GED algorithm as implemented in NetworkX; the chemical similarity between nodes is precomputed.""" # noqa: E501 def compute_ged( self, syngraph1: Union[MonopartiteReacSynGraph, BipartiteSynGraph], syngraph2: Union[MonopartiteReacSynGraph, BipartiteSynGraph], ged_params: ChemicalSimilarityParameters, ) -> float: """Takes two SynGraph instances, fingerprints and similarity methods for both molecules and reactions and returns the GED between the two graphs as computed by the GED algorithm in NetworkX. The similarity matrix between nodes in the involved graphs is precomputed. """ # noqa: E501 if isinstance(syngraph1, MonopartiteReacSynGraph) and isinstance( syngraph2, MonopartiteReacSynGraph ): reaction_similarity_matrix = self.precompute_reaction_similarity_matrix( syngraph1, syngraph2, ged_params ) out_data_model = "monopartite_reactions" # The cost function uses the selected reaction fingerprints. node_subst_cost_partial = partial( node_subst_cost_matrix, reaction_similarity_matrix=reaction_similarity_matrix, molecule_similarity_matrix=None, ) elif isinstance(syngraph1, BipartiteSynGraph) and isinstance( syngraph2, BipartiteSynGraph ): reaction_similarity_matrix = self.precompute_reaction_similarity_matrix( syngraph1, syngraph2, ged_params, ) molecule_similarity_matrix = self.precompute_molecule_similarity_matrix( syngraph1, syngraph2, ged_params ) out_data_model = "bipartite" # The cost function uses the selected reaction # and molecular fingerprints and the selected similarity type. node_subst_cost_partial = partial( node_subst_cost_matrix, reaction_similarity_matrix=reaction_similarity_matrix, molecule_similarity_matrix=molecule_similarity_matrix, ) else: logger.error( f"Graph1 has type = {type(syngraph1)}" f"Graph2 has type = {type(syngraph2)}. " f"The GED cannot be computed between graph of different types." ) raise MismatchingGraph nx_graphs = [ translator("syngraph", s, "networkx", out_data_model=out_data_model) for s in [syngraph1, syngraph2] ] # Retrieve the roots of the routes root_g1 = next(n for n, d in nx_graphs[0].out_degree() if d == 0) root_g2 = next(n for n, d in nx_graphs[1].out_degree() if d == 0) return nx.graph_edit_distance( nx_graphs[0], nx_graphs[1], node_subst_cost=node_subst_cost_partial, roots=(root_g1, root_g2), ) @staticmethod def precompute_reaction_similarity_matrix( syngraph1: Union[MonopartiteReacSynGraph, BipartiteSynGraph], syngraph2: Union[MonopartiteReacSynGraph, BipartiteSynGraph], ged_params: ChemicalSimilarityParameters, ) -> pd.DataFrame: """To precompute the similarity matrix for ChemicalEquation nodes only""" d_reactions1 = get_reactions_fp_dict( syngraph1, ged_params.reaction_fingerprint, ged_params.reaction_fp_params ) d_reactions2 = get_reactions_fp_dict( syngraph2, ged_params.reaction_fingerprint, ged_params.reaction_fp_params ) return build_similarity_matrix( d_reactions1, d_reactions2, ged_params.reaction_similarity ) @staticmethod def precompute_molecule_similarity_matrix( syngraph1: Union[MonopartiteReacSynGraph, BipartiteSynGraph], syngraph2: Union[MonopartiteReacSynGraph, BipartiteSynGraph], ged_params: ChemicalSimilarityParameters, ) -> pd.DataFrame: """To precompute the similarity matrix for ChemicalEquation nodes only""" d_mol1 = get_mol_fp_dict( syngraph1, ged_params.molecular_fingerprint, ged_params.molecular_fp_params, ged_params.molecular_fp_count_vect, ) d_mol2 = get_mol_fp_dict( syngraph2, ged_params.molecular_fingerprint, ged_params.molecular_fp_params, ged_params.molecular_fp_count_vect, ) return build_similarity_matrix(d_mol1, d_mol2, ged_params.reaction_similarity) class GedNx(Ged): """Subclass for the calculation of the GED algorithm as implemented in NetworkX.""" def compute_ged( self, syngraph1: Union[MonopartiteReacSynGraph, BipartiteSynGraph], syngraph2: Union[MonopartiteReacSynGraph, BipartiteSynGraph], ged_params: ChemicalSimilarityParameters, ) -> float: """Takes two SynGraph instances, fingerprints and similarity methods for both molecules and reactions and returns the GED between the two graphs as computed by the GED algorithm in NetworkX. """ # noqa: E501 if isinstance(syngraph1, MonopartiteReacSynGraph) and isinstance( syngraph2, MonopartiteReacSynGraph ): out_data_model = "monopartite_reactions" elif isinstance(syngraph1, BipartiteSynGraph) and isinstance( syngraph2, BipartiteSynGraph ): out_data_model = "bipartite" else: logger.error( f"Graph1 has type = {type(syngraph1)} " f"Graph2 has type = {type(syngraph2)}. " f"The GED cannot be computed between graph of different types." ) raise MismatchingGraph nx_graphs = [ translator("syngraph", s, "networkx", out_data_model=out_data_model) for s in [syngraph1, syngraph2] ] # The cost function uses the selected reaction # and molecular fingerprints and the selected similarity type. node_subst_cost_partial = partial( node_subst_cost, reaction_fingerprints=ged_params.reaction_fingerprint, reaction_fp_params=ged_params.reaction_fp_params, reaction_similarity_name=ged_params.reaction_similarity, molecular_fingerprint=ged_params.molecular_fingerprint, molecular_fp_params=ged_params.molecular_fp_params, molecular_fp_count_vect=ged_params.molecular_fp_count_vect, molecular_similarity_name=ged_params.molecular_similarity_name, ) # Retrieve the roots of the routes root_g1 = next(n for n, d in nx_graphs[0].out_degree if d == 0) root_g2 = next(n for n, d in nx_graphs[1].out_degree if d == 0) return nx.graph_edit_distance( nx_graphs[0], nx_graphs[1], node_subst_cost=node_subst_cost_partial, roots=(root_g1, root_g2), )
[docs] class GedFactory: """GED Factory to give access to the GED calculators. Attributes: ----------- available_ged: a dictionary It maps the strings representing the 'name' of a GED algorithm to the correct Ged subclass """ # noqa: E501 available_ged = { "nx_ged": { "value": GedNx, "info": 'Standard NetworkX GED algorithm. The "root" argument is used', }, "nx_ged_matrix": { "value": GedNxPrecomputedMatrix, "info": "Standard NetworkX GED algorithm. " "The distance matrix is computed in advance" 'and the "root" algorithm is used', }, "nx_optimized_ged": { "value": GedOptNx, "info": "Optimized NetworkX GED algorithm", }, } def select_ged( self, syngraph1, syngraph2, ged_method, ged_params: ChemicalSimilarityParameters, # reaction_fp, # reaction_fp_params, # reaction_similarity_name, # molecular_fp, # molecular_fp_params, # molecular_fp_count_vect, # molecular_similarity_name, ): if ged_method not in self.available_ged: logger.error( f"'{ged_method}' is invalid. " f"Available algorithms are: {self.available_ged.keys()}" ) raise UnavailableGED selector = self.available_ged[ged_method]["value"] return selector().compute_ged(syngraph1, syngraph2, ged_params)
[docs] def graph_distance_factory( syngraph1: Union[MonopartiteReacSynGraph, BipartiteSynGraph], syngraph2: Union[MonopartiteReacSynGraph, BipartiteSynGraph], ged_method: str, ged_params: Union[dict, None] = None, ) -> float: """ To compute the graph edit distance between 2 SynGraph objects Parameters: ----------- syngraph1: Union[MonopartiteReacSynGraph, BipartiteSynGraph] One of the input graphs syngraph2: Union[MonopartiteReacSynGraph, BipartiteSynGraph] Another input graph ged_method: str The graph edit distance algorithm to be used ged_params: Union[dict, None] It contains the optional parameters for chemical similarity calculations, which are: (i) reaction_fp: a string corresponding to the type of fingerprints to be used for reactions (ii) reaction_fp_params: a dictionary with the optional parameters for computing reaction fingerprints (iii) reaction_similarity_name: a string corresponding to the similarity type to be used for reactions (iv) molecular_fp: a string corresponding to the type of fingerprints to be used for molecules (v) molecular_fp_params: a dictionary with the optional parameters for computing molecular fingerprints (vi) molecular_fp_count_vect: a boolean indicating whether 'GetCountFingerprint' should be used (vii) molecular_similarity_name: a string corresponding to the similarity type to be used for molecules If it is not provided, the default parameters are used (default None) Returns: ------ ged: float The ged between the two input graphs Example: ------- >>> graphs = json.loads(open('ibm_file.json').read()) >>> syngraphs = [translator('ibm_retro', g, 'syngraph', out_data_model='bipartite') for g in graphs] >>> ged = graph_distance_factory(syngraphs[0], syngraphs[3], ged_method='nx_ged') """ # noqa: E501 params = set_chemical_similarity_parameters(ged_params) ged_calculator = GedFactory() return ged_calculator.select_ged(syngraph1, syngraph2, ged_method, params)
def set_chemical_similarity_parameters( ged_params: dict, ) -> ChemicalSimilarityParameters: """To set the instance of ChemicalSimilarityParameters with the desired parameters""" params = ChemicalSimilarityParameters() if ged_params is None: return params params.reaction_fingerprint = ged_params.get( "reaction_fp", settings.GED.reaction_fp ) params.reaction_fp_params = ged_params.get( "reaction_fp_params", settings.GED.reaction_fp_params ) params.reaction_similarity_name = ged_params.get( "reaction_similarity_name", settings.GED.reaction_similarity_name ) params.molecular_fingerprint = ged_params.get( "molecular_fp", settings.GED.molecular_fp ) params.molecular_fp_params = ged_params.get( "molecular_fp_params", settings.GED.molecular_fp_params ) params.molecular_fp_count_vect = ged_params.get( "molecular_fp_count_vect", settings.GED.molecular_fp_count_vect ) params.molecular_similarity_name = ged_params.get( "molecular_similarity_name", settings.GED.molecular_similarity_name ) return params # COST FUNCTIONS def node_subst_cost_matrix( node1, node2, reaction_similarity_matrix: pd.DataFrame, molecule_similarity_matrix: Union[pd.DataFrame, None], ): """To compute the cost of substituting ona node with another, based on the pre-computed similarity matrices. The more different the nodes, the higher the cost. """ # noqa: E501 # The correct similarity matrix is used based on the node types if isinstance(node1["properties"]["node_type"], ChemicalEquation) and isinstance( node2["properties"]["node_type"], ChemicalEquation ): similarity = reaction_similarity_matrix.loc[ node2["properties"]["node_type"].uid, node1["properties"]["node_type"].uid ] return 1.0 - similarity elif isinstance(node1["properties"]["node_type"], Molecule) and isinstance( node2["properties"]["node_type"], Molecule ): similarity = molecule_similarity_matrix.loc[ node2["properties"]["node_type"].uid, node1["properties"]["node_type"].uid ] return 1.0 - similarity else: return 1.0 def node_subst_cost( node1, node2, reaction_fingerprints, reaction_fp_params, reaction_similarity_name, molecular_fingerprint, molecular_fp_params, molecular_fp_count_vect, molecular_similarity_name, ) -> float: """To compute the cost of substituting one node with another, based on the selected fingerprints/similarity. The more different the nodes, the higher the cost. Returns: --------- cost: float The cost of the substitution (between 0 and 1) """ # noqa: E501 # If both nodes are ChemicalEquation, their similarity is computed if isinstance(node1["properties"]["node_type"], ChemicalEquation) and isinstance( node2["properties"]["node_type"], ChemicalEquation ): return get_reaction_similarity( node1["properties"]["node_type"].rdrxn, node2["properties"]["node_type"].rdrxn, reaction_fingerprints, reaction_fp_params, reaction_similarity_name, ) # if both nodes are MoleculeEquation, their similarity is computed elif isinstance(node1["properties"]["node_type"], Molecule) and isinstance( node2["properties"]["node_type"], Molecule ): return get_molecular_similarity( node1["properties"]["node_type"].rdmol, node2["properties"]["node_type"].rdmol, molecular_fingerprint, molecular_fp_params, molecular_similarity_name, molecular_fp_count_vect, ) # if the two nodes are of different types, the maximum diversity is returned else: return 1.0 def get_reaction_similarity( rdrxn1: cif.rdChemReactions, rdrxn2: cif.rdChemReactions, reaction_fingerprint, reaction_fp_params, reaction_similarity, ) -> float: """To compute the similarity between two reactions""" fp1 = compute_reaction_fingerprint( rdrxn1, fp_name=reaction_fingerprint, params=reaction_fp_params, ) fp2 = compute_reaction_fingerprint( rdrxn2, fp_name=reaction_fingerprint, params=reaction_fp_params, ) similarity = compute_similarity(fp1, fp2, similarity_name=reaction_similarity) return 1.0 - similarity def get_molecular_similarity( rdmol1: cif.rdChemReactions, rdmol2: cif.rdChemReactions, molecular_fingerprint, molecular_fp_params, molecular_similarity_name, molecular_fp_count_vect, ) -> float: """To compute the similarity between two molecules""" fp1 = compute_mol_fingerprint( rdmol1, fp_name=molecular_fingerprint, parameters=molecular_fp_params, count_fp_vector=molecular_fp_count_vect, ) fp2 = compute_mol_fingerprint( rdmol2, fp_name=molecular_fingerprint, parameters=molecular_fp_params, count_fp_vector=molecular_fp_count_vect, ) similarity = compute_similarity(fp1, fp2, similarity_name=molecular_similarity_name) return 1.0 - similarity def get_mol_fp_dict( syngraph: BipartiteSynGraph, molecular_fingerprint: str, molecular_fp_params=DEFAULT_GED["molecular_fp_params"]["value"], molecular_fp_count_vect=DEFAULT_GED["molecular_fp_count_vect"]["value"], ) -> dict: """ To build a dictionary, whose keys are the hashes of the Molecule nodes in a SynGraph and the values their fingerprints. Parameters: ------------ syngraph: BipartiteSynGraph The graph object for whose nodes fingerprints should be computed molecular_fingerprint: str The selected type of molecular fingerprint molecular_fp_params: dict The optional parameters for computing molecular fingerprints molecular_fp_count_vect: bool Whether 'GetCountFingerprint' should be used Returns: ---------- molecule_node_fingerprints: dict The fingerprints of the Molecule nodes """ # noqa: E501 molecules = get_molecule_nodes(syngraph) return { mol.uid: compute_mol_fingerprint( mol.rdmol, molecular_fingerprint, molecular_fp_params, molecular_fp_count_vect, ) for mol in molecules } def get_reactions_fp_dict( syngraph: Union[MonopartiteReacSynGraph, BipartiteSynGraph], reaction_fingerprints: str, reaction_fp_params=DEFAULT_GED["reaction_fp_params"]["value"], ) -> dict: """ To build a dictionary, whose keys are the hashes of the ChemicalEquation nodes in a SynGraph and the values their fingerprints. Parameters: ------------ syngraph: Union[MonopartiteReacSynGraph, BipartiteSynGraph] The graph object for whose nodes fingerprints should be computed reaction_fingerprints: str The selected type of reaction fingerprint reaction_fp_params: dict The optional parameters for computing reaction fingerprints Returns: ---------- reaction_node_fingerprints: dict The fingerprints of the ChemicalEquation nodes """ # noqa: E501 reactions = get_chemical_equation_nodes(syngraph) return { ce.uid: compute_reaction_fingerprint( ce.rdrxn, reaction_fingerprints, reaction_fp_params ) for ce in reactions } def get_chemical_equation_nodes( syngraph: Union[MonopartiteReacSynGraph, BipartiteSynGraph] ) -> set: """To extract all ChemicalEquation nodes from a SynGraph""" reactions = set() for parent, children in syngraph.graph.items(): if isinstance(parent, ChemicalEquation): reactions.add(parent) [ reactions.add(child) for child in children if isinstance(child, ChemicalEquation) ] return reactions def get_molecule_nodes(syngraph: BipartiteSynGraph) -> set: """To extract all Molecule nodes from a SynGraph""" molecules = set() for parent, children in syngraph.graph.items(): if isinstance(parent, Molecule): molecules.add(parent) [molecules.add(child) for child in children if isinstance(child, Molecule)] return molecules def build_similarity_matrix( d_fingerprints1: dict, d_fingerprints2: dict, similarity_name: str = settings.GED.molecular_similarity_name, ) -> pd.DataFrame: """ To build the similarity matrix between two routes with the selected method. Parameters: ------------ d_fingerprints1: dict The fingerprint of the first graph to be considered in the form {hash: fingerprints} d_fingerprints2: dict The fingerprint of the second graph to be considered in the form {hash: fingerprints} similarity_name: str The similarity method to be used Returns: --------- matrix: pd.DataFrame a pandas dataframe (n nodes in graph1) x (n nodes in graph2) containing the similarity values """ # noqa: E501 columns = list(d_fingerprints1.keys()) rows = list(d_fingerprints2.keys()) matrix = pd.DataFrame( np.zeros((len(rows), len(columns))), columns=columns, index=rows ) for h1, fp1 in d_fingerprints1.items(): for h2, fp2 in d_fingerprints2.items(): if matrix.loc[h2, h1] == 0: sim = compute_similarity(fp1, fp2, similarity_name=similarity_name) matrix.loc[h2, h1] = sim return matrix
[docs] def compute_distance_matrix( syngraphs: List[Union[MonopartiteReacSynGraph, BipartiteSynGraph]], ged_method: str, ged_params: Union[dict, None] = None, parallelization: bool = False, n_cpu=settings.GED.n_cpu, ) -> pd.DataFrame: """ To compute the distance matrix of a set of routes. Parameters: ----------- syngraphs: List[Union[MonopartiteReacSynGraph, BipartiteSynGraph]] The routes for which the distance matrix must be computed ged_method: str The graph edit distance method to be used ged_params: Optional[Union[dict, None]] The dictionary containing the parameters for fingerprints and similarity calculations; if it is not provided, the default values are used (default None) parallelization: Optional[bool] Whether parallelization should be used (default False) n_cpu: Optional[int] If parallelization is activated, it indicates the number of CPUs to be used (default 8) Returns: -------- matrix: a pandas DataFrame The distance matrix, with dimensions (n routes x n routes), with the graph distances Example: -------- >>> graph = json.loads(open('az_file.json').read()) >>> mp_syngraphs = [translator('az_retro', g, 'syngraph', out_data_model='monopartite_reactions') for g in graph] >>> m = compute_distance_matrix(mp_syngraphs, ged_method='nx_ged') """ # noqa: E501 if len(syngraphs) < 2: logger.error( "Less than 2 routes were found: " "it is not possible to compute the distance matrix" ) raise TooFewRoutes # Calculation with parallelization if parallelization: matrix = setup_parallel_calculation(syngraphs, n_cpu, ged_method, ged_params) else: n_routes = len(syngraphs) matrix = pd.DataFrame(columns=range(n_routes), index=range(n_routes)) for i in range(n_routes): for j in range(i, n_routes): sim = graph_distance_factory( syngraphs[i], syngraphs[j], ged_method=ged_method, ged_params=ged_params, ) matrix.loc[j, i] = sim matrix.loc[i, j] = sim return matrix
def setup_parallel_calculation( syngraphs: list, n_cpu: int, ged_method: str, ged_params: dict ) -> pd.DataFrame: """To setup the matrix calculation with parallelization""" routes_range = range(len(syngraphs)) results = [] pool = mp.Pool(n_cpu) for i in routes_range: in_routes = [(i, j, syngraphs[i], syngraphs[j]) for j in routes_range if j >= i] results.append( pool.starmap( parallel_matrix_calculations, [(tup, ged_method, ged_params) for tup in in_routes], ) ) pool.close() return build_ged_matrix(results, routes_range) def parallel_matrix_calculations( data: tuple, ged_method: str, ged_params: dict ) -> tuple: """ To compute the distance matrix elements in a fashion suitable for parallel computation. Parameters: ----------- data: tuple It contains the two indices and the two routes for computing an element of the distance matrix (i, j, route1, route2) ged_method: str The graph edit distance method to be used ged_params: dict The optional parameters for the ged calculation Returns: -------- i, j, sim: tuple Two indices of the matrix and the relative distance value """ # noqa: E501 (i, j, r1, r2) = data sim = graph_distance_factory(r1, r2, ged_method=ged_method, ged_params=ged_params) return i, j, sim def build_ged_matrix(results: list, routes_range: range) -> pd.DataFrame: """To build the distance matrix from a list of lists""" matrix = pd.DataFrame(columns=routes_range, index=routes_range) for result in results: for t in result: matrix.loc[t[1], t[0]] = t[2] matrix.loc[t[0], t[1]] = t[2] return matrix # Helper functions def get_available_ged_algorithms() -> dict: """Returns a dictionary with the available GED algorithms and some info""" return { f: additional_info["info"] for f, additional_info in GedFactory.available_ged.items() } def get_ged_default_parameters() -> dict: """Returns a dictionary with the default parameters used in GED calculation""" return {f: additional_info["info"] for f, additional_info in DEFAULT_GED.items()} def get_ged_parameters() -> dict: """Returns a dictionary with the default parameters used in GED calculations""" return { f: additional_info["general_info"] for f, additional_info in DEFAULT_GED.items() }