RDKit
Open-source cheminformatics and machine learning.
MolStandardize.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018-2021 Susan H. Leung and other RDKit contributors
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 /*! \file MolStandardize.h
11 
12  \brief Defines the CleanupParameters and some convenience functions.
13 
14 */
15 #include <RDGeneral/export.h>
16 #ifndef RD_MOLSTANDARDIZE_H
17 #define RD_MOLSTANDARDIZE_H
18 
19 #include <string>
20 #include <GraphMol/RDKitBase.h>
22 
23 namespace RDKit {
24 class RWMol;
25 class ROMol;
26 
27 namespace MolStandardize {
28 
29 //! The CleanupParameters structure defines the default parameters for the
30 /// cleanup process and also allows the user to customize the process by
31 /// changing the parameters.
32 /*!
33 
34  <b>Notes:</b>
35  - To customize the parameters, the structure must be initialized first.
36  (Another on the TODO list)
37  - For this project, not all the parameters have been revealed.
38  (TODO)
39 
40 */
42  // TODO reveal all parameters
43  private:
44  const char *rdbase_cstr = std::getenv("RDBASE");
45 
46  public:
47  std::string rdbase = rdbase_cstr != nullptr ? rdbase_cstr : "";
48  std::string normalizations;
49  std::string acidbaseFile;
50  std::string fragmentFile;
51  std::string tautomerTransforms;
52  int maxRestarts{200}; //!< The maximum number of times to attempt to apply
53  //!< the series of normalizations (default 200).
54  bool preferOrganic{false}; //!< Whether to prioritize organic fragments when
55  //!< choosing fragment parent (default False).
56  bool doCanonical{true}; //!< Whether to apply normalizations in a
57  //!< canonical order
58  int maxTautomers{1000}; //!< The maximum number of tautomers to enumerate
59  //!< (default 1000).
60  int maxTransforms{1000}; //!< The maximum number of tautomer
61  //!< transformations to apply (default 1000).
62  bool tautomerRemoveSp3Stereo{
63  true}; //!< Whether to remove stereochemistry from sp3 centers involved
64  //!< in tautomerism (defaults to true)
65  bool tautomerRemoveBondStereo{
66  true}; //!< Whether to remove stereochemistry from double bonds involved
67  //!< in tautomerism (defaults to true)
68  bool tautomerRemoveIsotopicHs{
69  true}; //!< Whether to remove isotopic Hs from centers involved in
70  //!< tautomerism (defaults to true)
71  bool tautomerReassignStereo{
72  true}; //!< Whether enumerate() should call assignStereochemistry on all
73  //!< generated tautomers (defaults to true)
74  bool largestFragmentChooserUseAtomCount{
75  true}; //!< Whether LargestFragmentChooser should use atom count as main
76  //!< criterion before MW (defaults to true)
77  bool largestFragmentChooserCountHeavyAtomsOnly{
78  false}; //!< Whether LargestFragmentChooser should only count heavy atoms
79  //!< (defaults to false)
80  std::vector<std::pair<std::string, std::string>> normalizationData;
81  std::vector<std::pair<std::string, std::string>> fragmentData;
82  std::vector<std::tuple<std::string, std::string, std::string>> acidbaseData;
83  std::vector<std::tuple<std::string, std::string, std::string, std::string>>
86 };
87 
88 RDKIT_MOLSTANDARDIZE_EXPORT extern const CleanupParameters
90 
92  CleanupParameters &params, const std::string &json);
93 
94 //! The cleanup function is equivalent to the
95 /// molvs.Standardizer().standardize(mol) function. It calls the same steps,
96 /// namely: RemoveHs, RDKit SanitizeMol, MetalDisconnector, Normalizer,
97 /// Reionizer, RDKit AssignStereochemistry.
99  const RWMol *mol,
101 //! \overload
102 inline RWMol *cleanup(const RWMol &mol, const CleanupParameters &params =
104  return cleanup(&mol, params);
105 };
106 
107 //! Works the same as Normalizer().normalize(mol)
109  const RWMol *mol,
111 
112 //! Works the same as Reionizer().reionize(mol)
114  const RWMol *mol,
116 
117 //! Works the same as FragmentRemover().remove(mol)
119  const RWMol *mol,
121 
122 //! Works the same as TautomerEnumerator().canonicalize(mol)
124  const RWMol *mol,
126 
127 //! Returns the tautomer parent of a given molecule. The fragment parent is the
128 /// standardized canonical tautomer of the molecule
130  const RWMol &mol,
132  bool skipStandardize = false);
133 
134 //! Returns the fragment parent of a given molecule. The fragment parent is the
135 /// largest organic covalent unit in the molecule.
137  const RWMol &mol,
139  bool skip_standardize = false);
140 
141 //! calls removeStereochemistry() on the given molecule
143  const RWMol &mol,
145  bool skip_standardize = false);
146 
147 //! removes all isotopes specifications from the given molecule
149  const RWMol &mol,
151  bool skip_standardize = false);
152 
153 //! Returns the charge parent of a given molecule. The charge parent is the
154 //! uncharged version of the fragment parent.
156  const RWMol &mol,
158  bool skip_standardize = false);
159 
160 //! Returns the super parent. The super parent is the fragment, charge, isotope,
161 //! stereo, and tautomer parent of the molecule.
163  const RWMol &mol,
165  bool skip_standardize = false);
166 
167 //! Convenience function for quickly standardizing a single SMILES string.
168 /// Returns a standardized canonical SMILES string given a SMILES string.
169 /// This is the equivalent of calling cleanup() on each of the molecules
171  const std::string &smiles);
172 
173 //! Do a disconnection of an organometallic complex according to rules
174 //! preferred by Syngenta. All bonds to metals are broken, including
175 //! covalent bonds to Group I/II metals (so including Grignards, lithium
176 //! complexes etc.). The ligands are left in the charge states they came
177 //! in with. If there are haptic bonds defined by a dummy atom bonded to
178 //! a metal by a bond that has a _MolFileBondEndPts (which will contain the
179 //! indices of the atoms involved in the haptic bond) then the dummy atom
180 //! is removed also.
181 //! Do the disconnection in place.
182 //! The options are splitGrignards, splitAromaticC, adjustCharges and
183 //! removeHapticDummies. Roll on C++20 and designated initializers!
186  true, true, false, true});
187 //! As above, but returns new disconnected molecule.
190  true, true, false, true});
191 
192 //! TODO
194  const std::string &smiles,
196 }; // namespace MolStandardize
197 } // namespace RDKit
198 #endif
pulls in the core RDKit functionality
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
#define RDKIT_MOLSTANDARDIZE_EXPORT
Definition: export.h:321
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * removeFragments(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as FragmentRemover().remove(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * tautomerParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skipStandardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * canonicalTautomer(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as TautomerEnumerator().canonicalize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT std::vector< std::string > enumerateTautomerSmiles(const std::string &smiles, const CleanupParameters &params=defaultCleanupParameters)
TODO.
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * cleanup(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * stereoParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
calls removeStereochemistry() on the given molecule
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * fragmentParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * reionize(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as Reionizer().reionize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * chargeParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT const CleanupParameters defaultCleanupParameters
Definition: Fragment.h:25
RDKIT_MOLSTANDARDIZE_EXPORT void disconnectOrganometallics(RWMol &mol, RDKit::MolStandardize::MetalDisconnectorOptions mdo={ true, true, false, true})
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * isotopeParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
removes all isotopes specifications from the given molecule
RDKIT_MOLSTANDARDIZE_EXPORT std::string standardizeSmiles(const std::string &smiles)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * superParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * normalize(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as Normalizer().normalize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void updateCleanupParamsFromJSON(CleanupParameters &params, const std::string &json)
Std stuff.
Definition: Abbreviations.h:19
std::vector< std::tuple< std::string, std::string, std::string, std::string > > tautomerTransformData
std::vector< std::tuple< std::string, std::string, std::string > > acidbaseData
std::vector< std::pair< std::string, std::string > > fragmentData
std::vector< std::pair< std::string, std::string > > normalizationData