Source code for linchemin.cheminfo.constructors

from typing import Union

import linchemin.cheminfo.functions as cif
from linchemin import settings, utilities
from linchemin.cheminfo.chemical_hashes import (
    calculate_molecular_hash_map,
    calculate_reaction_like_hash_map,
    validate_molecular_identifier,
    validate_reaction_identifier,
)
from linchemin.cheminfo.model_constructors.disconnection_constructor import (
    create_disconnection,
)
from linchemin.cheminfo.model_constructors.ratam_constructor import create_ratam
from linchemin.cheminfo.models import ChemicalEquation, Molecule, ReactionComponents

logger = utilities.console_logger(__name__)


class UnparsableReaction(Exception):
    """To be raised if the input reaction string cannot be parsed"""


class UnparsableMolecule(Exception):
    """To be raised if the input molecule string cannot be parsed"""


class InvalidMoleculeInput(Exception):
    """To be raised if the input molecule is not an rdkit Mol object"""


# Molecule Constructor
[docs] class MoleculeConstructor: """ Class implementing the constructor of the Molecule class Attributes: ------------ molecular_identity_property_name: a string indicating which kind of input string determines the identity of the object (e.g. 'smiles') hash_list: a list containing the additional hash values to be computed """
[docs] def __init__( self, molecular_identity_property_name: str = settings.CONSTRUCTORS.molecular_identity_property_name, hash_list: list = settings.CONSTRUCTORS.molecular_hash_list, ): validate_molecular_identifier( molecular_identifier=molecular_identity_property_name ) self.molecular_identity_property_name = molecular_identity_property_name self.hash_list = set(hash_list + [self.molecular_identity_property_name])
def build_from_molecule_string( self, molecule_string: str, inp_fmt: str ) -> Molecule: """ Build a Molecule instance from a string representation. Args: molecule_string (str): String representation of the molecule inp_fmt (str): Input format of the molecule string Returns: Molecule: Constructed Molecule instance Raises: UnparsableMolecule: If the molecule string cannot be parsed """ try: rdmol_input = cif.rdmol_from_string( input_string=molecule_string, inp_fmt=inp_fmt ) return self.build_from_rdmol(rdmol_input) except ValueError as e: logger.error(f"Invalid input format: {inp_fmt}. Error: {e}") raise UnparsableMolecule(f"Invalid input format: {inp_fmt}") except Exception as e: logger.error( f"Failed to parse molecule string: {molecule_string}. Error: {e}" ) raise UnparsableMolecule(f"Failed to parse molecule string: {str(e)}") def build_from_rdmol(self, rdmol: cif.Mol) -> Molecule: """ Build a Molecule instance from a rdkit Mol instance. Args: rdmol (Chem.Mol): RDKit Mol object Returns: Molecule: Constructed Molecule instance Raises: InvalidMoleculeInput: If the input RDKit Mol object is invalid """ if not isinstance(rdmol, cif.Mol): raise InvalidMoleculeInput("Input must be a valid RDKit Mol object") rdmol_mapped = rdmol rdmol_unmapped = cif.remove_rdmol_atom_mapping(rdmol=rdmol_mapped) rdmol_unmapped_canonical = cif.new_molecule_canonicalization(rdmol_unmapped) rdmol_mapped_canonical = cif.new_molecule_canonicalization(rdmol_mapped) hash_map = calculate_molecular_hash_map( rdmol=rdmol_unmapped_canonical, hash_list=self.hash_list, ) identity_property = hash_map.get(self.molecular_identity_property_name) uid = utilities.create_hash(identity_property) smiles = cif.compute_mol_smiles(rdmol=rdmol_unmapped_canonical) return Molecule( rdmol=rdmol_unmapped_canonical, rdmol_mapped=rdmol_mapped_canonical, molecular_identity_property_name=self.molecular_identity_property_name, hash_map=hash_map, smiles=smiles, uid=uid, identity_property=identity_property, )
[docs] class ChemicalEquationConstructor: """ Class implementing the constructor of the ChemicalEquation class Attributes: --------------- molecular_identity_property_name: a string indicating the property determining the identity of the molecules in the chemical equation (e.g. 'smiles') chemical_equation_identity_name: a string indicating the components of the chemical equation participating in the definition of its uid (e.g., 'r_p' to include only reactants and products; 'r_r_p' to include also reagents """ desired_product: Molecule molecule_constructor: MoleculeConstructor original_reaction_components: ReactionComponents chemical_equation: ChemicalEquation
[docs] def __init__( self, molecular_identity_property_name: str = settings.CONSTRUCTORS.molecular_identity_property_name, chemical_equation_identity_name: str = settings.CONSTRUCTORS.chemical_equation_identity_name, ) -> None: validate_molecular_identifier( molecular_identifier=molecular_identity_property_name ) self.molecular_identity_property_name = molecular_identity_property_name validate_reaction_identifier( reaction_identifier=chemical_equation_identity_name ) self.chemical_equation_identity_name = chemical_equation_identity_name self.molecule_constructor = MoleculeConstructor( self.molecular_identity_property_name )
def build_from_rdrxn( self, rdrxn: cif.rdChemReactions.ChemicalReaction, desired_product: Union[cif.Mol, None] = settings.CONSTRUCTORS.desired_product, ) -> ChemicalEquation: """To build a ChemicalEquation instance from a rdkit ChemicalReaction object""" components = cif.rdrxn_to_molecule_catalog(rdrxn, self.molecule_constructor) self.original_reaction_components = ReactionComponents.from_dict(components) if not desired_product: self.desired_product = cif.get_heaviest_mol( self.original_reaction_components.products ) else: self.desired_product = self.molecule_constructor.build_from_rdmol( desired_product ) return self._unpack_rdrxn() def build_from_reaction_string( self, reaction_string: str, inp_fmt: str, desired_product: Union[str, None] = settings.CONSTRUCTORS.desired_product, ) -> ChemicalEquation: """To build a ChemicalEquation instance from a reaction string""" try: rdrxn = cif.rdrxn_from_string(input_string=reaction_string, inp_fmt=inp_fmt) except Exception as e: logger.error(f"String {reaction_string} could not be parsed. Error: {e}") raise UnparsableReaction if desired_product: desired_product = cif.rdmol_from_string(desired_product, inp_fmt=inp_fmt) return self.build_from_rdrxn(rdrxn=rdrxn, desired_product=desired_product) def _unpack_rdrxn(self) -> ChemicalEquation: if cif.is_mapped_molecule(self.desired_product.rdmol_mapped): return self._build_from_mapped_reaction() return self._build_from_unmapped_reaction() def _build_from_unmapped_reaction(self) -> ChemicalEquation: self.chemical_equation = ChemicalEquation() self.chemical_equation.catalog = ( self.original_reaction_components.get_molecule_catalog() ) self.chemical_equation.role_map = ( self.original_reaction_components.get_role_map() ) self.chemical_equation.stoichiometry_coefficients = ( self.original_reaction_components.get_stoichiometry_coefficients() ) self._populate_ce() self.chemical_equation.mapping = None self.chemical_equation.disconnection = None return self.chemical_equation def _build_from_mapped_reaction(self) -> ChemicalEquation: self.chemical_equation = ChemicalEquation() self.chemical_equation.catalog = ( self.original_reaction_components.get_molecule_catalog() ) self.chemical_equation.mapping = create_ratam( self.original_reaction_components, self.desired_product ) self.chemical_equation.role_map = self.chemical_equation.mapping.get_role_map() self.chemical_equation.stoichiometry_coefficients = ( self.chemical_equation.mapping.get_stoichiometry_coefficients() ) self._populate_ce() self.chemical_equation.disconnection = create_disconnection( self.chemical_equation, self.desired_product ) return self.chemical_equation def _populate_ce(self) -> None: use_reagents = self.chemical_equation_identity_name not in ["r_p", "u_r_p"] use_mapping = self.chemical_equation.mapping is not None self.chemical_equation.rdrxn = self._build_rdrxn( use_reagents=use_reagents, use_mapping=use_mapping ) self.chemical_equation.smiles = self._get_reaction_smiles(use_mapping) self.chemical_equation.hash_map = calculate_reaction_like_hash_map( self.chemical_equation.catalog, self.chemical_equation.role_map ) self.chemical_equation.uid = self.chemical_equation.hash_map.get( self.chemical_equation_identity_name ) def _build_rdrxn( self, use_reagents: bool, use_mapping: bool ) -> cif.rdChemReactions.ChemicalReaction: """Build an RDKit ChemicalReaction object.""" return cif.build_rdrxn( catalog=self.chemical_equation.catalog, role_map=self.chemical_equation.role_map, stoichiometry_coefficients=self.chemical_equation.stoichiometry_coefficients, use_reagents=use_reagents, use_smiles=False, use_atom_mapping=use_mapping, mapping=self.chemical_equation.mapping, ) def _get_reaction_smiles(self, use_mapping: bool) -> str: """Get the SMILES representation of the reaction.""" return cif.rdrxn_to_string( rdrxn=self.chemical_equation.rdrxn, out_fmt="smiles", use_atom_mapping=use_mapping, )