import copy
from abc import ABC, abstractmethod
from typing import List, Union
import linchemin.utilities as utilities
from linchemin.cgu.syngraph import (
BipartiteSynGraph,
MonopartiteMolSynGraph,
MonopartiteReacSynGraph,
SynGraph,
)
from linchemin.cheminfo.constructors import ChemicalEquationConstructor
from linchemin.cheminfo.models import ChemicalEquation, Molecule
"""Module containing functions and classes to perform operations on SynGraph instances"""
logger = utilities.console_logger(__name__)
class SmilesTypeError(TypeError):
"""Error raised if the provided smiles string is of the wrong type"""
pass
class GraphTypeError(TypeError):
"""Error raised if the provided graph is not a SynGraph"""
pass
[docs]
def merge_syngraph(
list_syngraph: List[SynGraph],
) -> Union[MonopartiteReacSynGraph, BipartiteSynGraph, MonopartiteMolSynGraph]:
"""
To merge a list od SynGraph objects in a single graph instance.
Parameters:
-----------
list_syngraph: SynGraph
The list of the input SynGraph objects to be merged
Returns:
---------
merged: Union[MonopartiteReacSynGraph, BipartiteSynGraph, MonopartiteMolSynGraph]
The new SynGraph object resulting from the merging of the input graphs; the SynGraph type is the same as the
input graphs
Raises:
---------
TypeError: if the input list contains non SynGraph objects
Example:
--------
>>> graph_ibm = json.loads(open('ibm_file.json').read())
>>> all_routes_ibm = [translator('ibm_retro', g, 'syngraph', out_data_model='bipartite') for g in graph_ibm]
>>> merged_graph = merge_syngraph(all_routes_ibm)
"""
if all(isinstance(x, MonopartiteReacSynGraph) for x in list_syngraph):
merged = MonopartiteReacSynGraph()
elif all(isinstance(x, BipartiteSynGraph) for x in list_syngraph):
merged = BipartiteSynGraph()
elif all(isinstance(x, MonopartiteMolSynGraph) for x in list_syngraph):
merged = MonopartiteMolSynGraph()
else:
raise TypeError(
"Invalid type. Only SynGraph objects can be merged. All routes must "
"be in the same data model"
)
merged.source = "tree"
for syngraph in list_syngraph:
[merged.add_node(step) for step in syngraph]
return merged
[docs]
def add_reaction_to_syngraph(
syngraph: SynGraph,
reaction_to_add: str,
) -> Union[BipartiteSynGraph, MonopartiteMolSynGraph, MonopartiteReacSynGraph]:
"""
To add a chemical reaction to a SynGraph object
Parameters:
-------------
syngraph: Union[MonopartiteReacSynGraph, BipartiteSynGraph, MonopartiteMolSynGraph]
The SynGraph object to be modified
reaction_to_add: str
The smiles of the reaction to be added to the graph
Returns:
----------
new_graph: Union[MonopartiteReacSynGraph, BipartiteSynGraph, MonopartiteMolSynGraph]
The SynGraph object with the addition of the new node and of the same type as the input graph
Raises:
-------
TypeError: if the input graph is not in SynGraph format
SmilesTypeError: if the provided smiles is not a valid reaction smiles
"""
if not isinstance(
syngraph, (BipartiteSynGraph, MonopartiteMolSynGraph, MonopartiteReacSynGraph)
):
logger.error("Only Syngraph objects are supported")
raise GraphTypeError
if reaction_to_add.count(">") != 2:
logger.error("Please insert a valid reaction smiles")
raise SmilesTypeError
all_reactions = [
{"query_id": 0, "output_string": reaction_to_add}
] + build_list_of_reactions(syngraph)
return type(syngraph)(all_reactions)
[docs]
def remove_reaction_from_syngraph(
syngraph: Union[BipartiteSynGraph, MonopartiteMolSynGraph, MonopartiteReacSynGraph],
reaction_to_remove: str,
remove_dandling_nodes: bool = True,
) -> Union[BipartiteSynGraph, MonopartiteMolSynGraph, MonopartiteReacSynGraph]:
"""
To remove a reaction (and optionally the possible dangling nodes) from a SynGraph object based on the reaction smiles
Parameters:
-----------
syngraph: Union[MonopartiteReacSynGraph, BipartiteSynGraph, MonopartiteMolSynGraph]
The SynGraph object from which the nodes should be removed
reaction_to_remove: str
The smiles of the reaction to be removed
remove_dandling_nodes: bool
Wether the possible dandling nodes should be removed
Returns:
---------
new_graph: Union[MonopartiteReacSynGraph, BipartiteSynGraph, MonopartiteMolSynGraph]
A new SynGraph object from which the selected node and all its "parent" nodes are removed
Raises:
---------
TypeError: if the input object is not a SynGraph
SmilesTypeError: if the provided smiles is not a valid reaction smiles
KeyError: if the reaction to be removed is not present in the input SynGraph
Example
-------
>>> new_graph = remove_reaction_from_syngraph(syngraph, 'CCN.O>>CCO')
"""
if is_syngraph(syngraph):
if reaction_to_remove.count(">") == 2:
node = ChemicalEquationConstructor().build_from_reaction_string(
reaction_to_remove, "smiles"
)
else:
logger.error("Please insert a valid reaction smiles")
raise SmilesTypeError
if syngraph.graph.get(node, None) is None:
logger.warning(
"The selected node is not present in the input graph. The input graph is returned unchanged"
)
return syngraph
new_graph = copy.deepcopy(syngraph)
if remove_dandling_nodes:
return handle_dangling_nodes(new_graph, node)
else:
new_graph.remove_node(node.uid)
return new_graph
def handle_dangling_nodes(
graph: Union[MonopartiteReacSynGraph, BipartiteSynGraph, MonopartiteMolSynGraph],
node: ChemicalEquation,
) -> Union[MonopartiteReacSynGraph, BipartiteSynGraph, MonopartiteMolSynGraph]:
"""To remove the possible dangling nodes due to the removal of the selected node"""
nodes_to_remove = set()
for leaf in graph.get_leaves():
if path := find_path(graph, leaf, node):
[nodes_to_remove.add(n) for n in path]
for n in nodes_to_remove:
graph.remove_node(n.uid)
return graph
# Factory to extract reaction strings from a syngraph object
class ReactionsExtractor(ABC):
"""Abstract class for extracting a list of dictionary of reaction strings from a SynGraph object"""
@abstractmethod
def extract(
self,
syngraph: Union[
MonopartiteReacSynGraph, BipartiteSynGraph, MonopartiteMolSynGraph
],
) -> list:
pass
class ReactionsExtractorFromBipartite(ReactionsExtractor):
"""ReactionsExtractor subclass to handle BipartiteSynGraph objects"""
def extract(self, syngraph: BipartiteSynGraph) -> list:
unique_reactions = [
node
for node in syngraph.get_unique_nodes()
if isinstance(node, ChemicalEquation)
]
return [
{"query_id": n, "input_string": reaction.smiles}
for n, reaction in enumerate(unique_reactions)
]
class ReactionsExtractorFromMonopartiteReaction(ReactionsExtractor):
"""ReactionsExtractor subclass to handle MonopartiteReacSynGraph objects"""
def extract(self, syngraph: MonopartiteReacSynGraph) -> list:
unique_reactions = syngraph.get_unique_nodes()
return [
{"query_id": n, "input_string": reaction.smiles}
for n, reaction in enumerate(unique_reactions)
]
class ReactionsExtractorFromMonopartiteMolecules(ReactionsExtractor):
"""ReactionsExtractor subclass to handle MonopartiteMolSynGraph objects"""
# TODO: check behavior when the same reactant appears twice (should be solved once the stoichiometry attribute is developed)
def extract(self, syngraph: MonopartiteMolSynGraph) -> list:
unique_reactions = self.identify_reactions(syngraph)
return [
{"query_id": n, "input_string": reaction.smiles}
for n, reaction in enumerate(unique_reactions)
]
def identify_reactions(self, syngraph: MonopartiteMolSynGraph) -> set:
unique_reactions = set()
chemical_equation_constructor = ChemicalEquationConstructor(
molecular_identity_property_name="smiles"
)
for parent, children in syngraph:
for child in children:
reactants = [
r.smiles for r, products_set in syngraph if child in products_set
]
reaction_string = ">".join(
[".".join(reactants), ".".join([]), ".".join([child.smiles])]
)
chemical_equation = (
chemical_equation_constructor.build_from_reaction_string(
reaction_string=reaction_string, inp_fmt="smiles"
)
)
unique_reactions.add(chemical_equation)
return unique_reactions
class ExtractorFactory:
syngraph_types = {
BipartiteSynGraph: ReactionsExtractorFromBipartite,
MonopartiteReacSynGraph: ReactionsExtractorFromMonopartiteReaction,
MonopartiteMolSynGraph: ReactionsExtractorFromMonopartiteMolecules,
}
def extract_reactions(self, syngraph):
if type(syngraph) not in self.syngraph_types:
raise GraphTypeError(
"Invalid graph type. Available graph objects are:",
list(self.syngraph_types.keys()),
)
extractor = self.syngraph_types[type(syngraph)]
return extractor().extract(syngraph)
def find_path(
route: Union[MonopartiteReacSynGraph, BipartiteSynGraph],
leaf: Union[Molecule, ChemicalEquation],
root: Union[Molecule, ChemicalEquation],
path: Union[list, None] = None,
) -> list:
"""
To find a path between two nodes in a SynGraph.
Parameters:
------------
graph: Union[MonopartiteReacSynGraph, BipartiteSynGraph]
The graph of interest
leaf: Union[Molecule, ChemicalEquation]
The node at which the path should end
root: Union[Molecule, ChemicalEquation]
The node at which the path should start
path: Optional[Union[list, None]]
The list of Molecule/ChemicalEquation instances already discovered along the path (default None)
Returns:
--------
path: list
The path as list of Molecule and/or ChemicalEquation
Example:
---------
>>> path = find_path(syngraph, leaf_mol, root_mol)
"""
if path is None:
path = []
if leaf == root:
return path
path += [leaf]
if leaf == root:
return path
for node in route.graph[leaf]:
if node not in path:
if newpath := find_path(route, node, root, path):
return newpath
def find_all_paths(
syngraph: Union[MonopartiteReacSynGraph, BipartiteSynGraph, MonopartiteMolSynGraph],
) -> List[list]:
"""
To identify all linear paths in a syngraph
Parameters:
-----------
syngraph: Union[MonopartiteReacSynGraph, BipartiteSynGraph, MonopartiteMolSynGraph]
The syngraph object for which all the linear paths should be searched
Returns:
--------
all_paths: List[list]
The list of linear paths identified
Raises:
--------
TypeError: if the input graph is not a syngraph object
"""
if is_syngraph(syngraph):
root = syngraph.get_roots()
leaves = syngraph.get_leaves()
all_paths = []
for leaf in leaves:
path = find_path(syngraph, leaf, root[0])
reaction_path = [
step for step in path if isinstance(step, ChemicalEquation)
]
if reaction_path not in all_paths:
all_paths.append(reaction_path)
return all_paths
def build_list_of_reactions(syngraph):
"""To extract a list of reaction smiles from a syngraph and return it suitable to be used as input for syngraph
building"""
reactions = extract_reactions_from_syngraph(syngraph)
return [
{"query_id": d["query_id"], "output_string": d["input_string"]}
for d in reactions
]
def is_syngraph(graph) -> bool:
if not isinstance(
graph, (MonopartiteMolSynGraph, BipartiteSynGraph, MonopartiteReacSynGraph)
):
logger.error(
f"{type(graph)} is not supported. Only Syngraph objects are accepted."
)
raise GraphTypeError
return True