RDKit
Open-source cheminformatics and machine learning.
RDDepictor.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2003-2022 Greg Landrum and other RDKit contributors
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RDDEPICTOR_H
13 #define RDDEPICTOR_H
14 
16 #include <RDGeneral/types.h>
17 #include <Geometry/point.h>
18 #include <boost/smart_ptr.hpp>
19 
20 namespace RDKit {
21 class ROMol;
22 }
23 
24 namespace RDDepict {
25 
26 RDKIT_DEPICTOR_EXPORT extern bool
27  preferCoordGen; // Ignored if coordgen support isn't active
28 
29 typedef boost::shared_array<double> DOUBLE_SMART_PTR;
30 
31 class RDKIT_DEPICTOR_EXPORT DepictException : public std::exception {
32  public:
33  DepictException(const char *msg) : _msg(msg) {}
34  DepictException(const std::string msg) : _msg(msg) {}
35  const char *what() const noexcept override { return _msg.c_str(); }
36  ~DepictException() noexcept override = default;
37 
38  private:
39  std::string _msg;
40 };
41 
42 //! \brief Set the path to the file containing the ring system templates
43 /*!
44 
45  \param templatePath the file path to a file containing the ring system
46  templates. Each template must be a single line in the file represented using
47  CXSMILES, and the structure should be a single ring system.
48 
49  \throws DepictException if any of the templates are invalid
50 */
52 setRingSystemTemplates(const std::string templatePath);
53 
54 //! \brief Add ring system templates to be used in 2D coordinater generation.
55 /// If there are duplicates, the most recently added template will be used.
56 /*!
57 
58  \param templatePath the file path to a file containing the ring system
59  templates. Each template must be a single line in the file represented using
60  CXSMILES, and the structure should be a single ring system.
61 
62  \throws DepictException if any of the templates are invalid
63 */
65 addRingSystemTemplates(const std::string templatePath);
66 
67 //! \brief Load default ring system templates to be used in 2D coordinate
68 //! generation
70 
72  const RDGeom::INT_POINT2D_MAP *coordMap =
73  nullptr; //!< a map of int to Point2D, between atom IDs and their
74  //!< locations. This is the container the user needs to
75  //!< fill if he/she wants to specify coordinates for a portion
76  //!< of the molecule, defaults to 0
77  bool canonOrient = false; //!< canonicalize the orientation so that the long
78  //!< axes align with the x-axis etc.
79  bool clearConfs = true; //!< clear all existing conformations on the molecule
80  //!< before adding the 2D coordinates instead of
81  //!< simply adding to the list
82  unsigned int nFlipsPerSample = 0; //!< the number of rotatable bonds that are
83  //!< flipped at random for each sample
84  unsigned int nSamples = 0; //!< the number of samples
85  int sampleSeed = 0; //!< seed for the random sampling process
86  bool permuteDeg4Nodes = false; //!< try permuting the drawing order of bonds
87  //!< around atoms with four neighbors in order
88  //!< to improve the depiction
89  bool forceRDKit = false; //!< use RDKit to generate coordinates even if
90  //!< preferCoordGen is set to true
91  bool useRingTemplates = false; //!< whether to use ring system templates for
92  //!< generating initial coordinates
93 
95 };
96 
97 //! \brief Generate 2D coordinates (a depiction) for a molecule
98 /*!
99 
100  \param mol the molecule were are interested in
101 
102  \param params parameters used for 2D coordinate generation
103 
104  \return ID of the conformation added to the molecule containing the
105  2D coordinates
106 
107 */
109  RDKit::ROMol &mol, const Compute2DCoordParameters &params);
110 
111 //! \brief Generate 2D coordinates (a depiction) for a molecule
112 /*!
113 
114  \param mol the molecule were are interested in
115 
116  \param coordMap a map of int to Point2D, between atom IDs and
117  their locations. This is the container the user needs to fill if
118  he/she wants to specify coordinates for a portion of the molecule,
119  defaults to 0
120 
121  \param canonOrient canonicalize the orientation so that the long
122  axes align with the x-axis etc.
123 
124  \param clearConfs clear all existing conformations on the molecule
125  before adding the 2D coordinates instead of simply adding to the
126  list
127 
128  \param nFlipsPerSample - the number of rotatable bonds that are
129  flipped at random for each sample
130 
131  \param nSamples - the number of samples
132 
133  \param sampleSeed - seed for the random sampling process
134 
135  \param permuteDeg4Nodes - try permuting the drawing order of bonds around
136  atoms with four neighbors in order to improve the depiction
137 
138  \param forceRDKit - use RDKit to generate coordinates even if
139  preferCoordGen is set to true
140 
141  \param useRingTemplates whether to use ring system templates for generating
142  initial coordinates
143 
144  \return ID of the conformation added to the molecule containing the
145  2D coordinates
146 
147 */
149  RDKit::ROMol &mol, const RDGeom::INT_POINT2D_MAP *coordMap = nullptr,
150  bool canonOrient = false, bool clearConfs = true,
151  unsigned int nFlipsPerSample = 0, unsigned int nSamples = 0,
152  int sampleSeed = 0, bool permuteDeg4Nodes = false, bool forceRDKit = false,
153  bool useRingTemplates = false);
154 
155 //! \brief Compute the 2D coordinates such the interatom distances
156 /// mimic those in a distance matrix
157 /*!
158 
159  This function generates 2D coordinates such that the inter-atom
160  distances mimic those specified via dmat. This is done by randomly
161  sampling(flipping) the rotatable bonds in the molecule and
162  evaluating a cost function which contains two components. The
163  first component is the sum of inverse of the squared inter-atom
164  distances, this helps in spreading the atoms far from each
165  other. The second component is the sum of squares of the
166  difference in distance between those in dmat and the generated
167  structure. The user can adjust the relative importance of the two
168  components via a adjustable parameter (see below)
169 
170  ARGUMENTS:
171 
172  \param mol - molecule to generate coordinates for
173 
174  \param dmat - the distance matrix we want to mimic, this is a
175  symmetric N by N matrix where N is the number of atoms in mol. All
176  negative entries in dmat are ignored.
177 
178  \param canonOrient - canonicalize the orientation after the 2D
179  embedding is done
180 
181  \param clearConfs - clear any previously existing conformations on
182  mol before adding a conformation
183 
184  \param weightDistMat - A value between 0.0 and 1.0, this
185  determines the importance of mimicing the inter atoms
186  distances in dmat. (1.0 - weightDistMat) is the weight associated
187  to spreading out the structure (density) in the cost function
188 
189  \param nFlipsPerSample - the number of rotatable bonds that are
190  flipped at random for each sample
191 
192  \param nSamples - the number of samples
193 
194  \param sampleSeed - seed for the random sampling process
195 
196  \param permuteDeg4Nodes - try permuting the drawing order of bonds around
197  atoms with four neighbors in order to improve the depiction
198 
199  \param forceRDKit - use RDKit to generate coordinates even if
200  preferCoordGen is set to true
201 
202  \return ID of the conformation added to the molecule containing the
203  2D coordinates
204 
205 
206 */
208  RDKit::ROMol &mol, const DOUBLE_SMART_PTR *dmat = nullptr,
209  bool canonOrient = true, bool clearConfs = true, double weightDistMat = 0.5,
210  unsigned int nFlipsPerSample = 3, unsigned int nSamples = 100,
211  int sampleSeed = 25, bool permuteDeg4Nodes = true, bool forceRDKit = false);
212 
213 //! \brief Compute 2D coordinates where a piece of the molecule is
214 /// constrained to have the same coordinates as a reference.
215 /*!
216  This function generates a depiction for a molecule where a piece of the
217  molecule is constrained to have the same coordinates as a reference.
218 
219  This is useful for, for example, generating depictions of SAR data
220  sets so that the cores of the molecules are all oriented the same way.
221 
222  ARGUMENTS:
223 
224  \param mol - the molecule to be aligned, this will come back
225  with a single conformer.
226  \param reference - a molecule with the reference atoms to align to;
227  this should have a depiction.
228  \param confId - (optional) the id of the reference conformation to use
229  \param referencePattern - (optional) a query molecule to be used to
230  generate the atom mapping between the molecule
231  and the reference.
232  \param acceptFailure - (optional) if true, standard depictions will be
233  generated for molecules that don't have a substructure
234  match to the reference; if false, throws a
235  DepictException.
236  \param forceRDKit - (optional) use RDKit to generate coordinates even if
237  preferCoordGen is set to true
238  \param allowOptionalAttachments - (optional) if true, terminal dummy atoms in
239  the reference are ignored if they match an implicit
240  hydrogen in the molecule, and a constrained
241  depiction is still attempted
242  RETURNS:
243 
244  \return MatchVectType with (queryAtomidx, molAtomIdx) pairs used for
245  the constrained depiction
246 */
248  RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId = -1,
249  const RDKit::ROMol *referencePattern =
250  static_cast<const RDKit::ROMol *>(nullptr),
251  bool acceptFailure = false, bool forceRDKit = false,
252  bool allowOptionalAttachments = false);
253 
254 //! \brief Compute 2D coordinates where a piece of the molecule is
255 /// constrained to have the same coordinates as a reference.
256 /*!
257  This function generates a depiction for a molecule where a piece of the
258  molecule is constrained to have the same coordinates as a reference.
259 
260  This is useful for, for example, generating depictions of SAR data
261  sets so that the cores of the molecules are all oriented the same way.
262  This overload allow to specify the (referenceAtom, molAtom) index pairs
263  which should be matched as MatchVectType. Please note that the
264  vector can be shorter than the number of atoms in the reference.
265 
266  ARGUMENTS:
267 
268  \param mol - the molecule to be aligned, this will come back
269  with a single conformer.
270  \param reference - a molecule with the reference atoms to align to;
271  this should have a depiction.
272  \param refMatchVect - a MatchVectType that will be used to
273  generate the atom mapping between the molecule
274  and the reference.
275  \param confId - (optional) the id of the reference conformation to use
276  \param forceRDKit - (optional) use RDKit to generate coordinates even if
277  preferCoordGen is set to true
278 */
280  RDKit::ROMol &mol, const RDKit::ROMol &reference,
281  const RDKit::MatchVectType &refMatchVect, int confId = -1,
282  bool forceRDKit = false);
283 
284 //! \brief Generate a 2D depiction for a molecule where all or part of
285 /// it mimics the coordinates of a 3D reference structure.
286 /*!
287  Generates a depiction for a molecule where a piece of the molecule
288  is constrained to have coordinates similar to those of a 3D reference
289  structure.
290 
291  ARGUMENTS:
292  \param mol - the molecule to be aligned, this will come back
293  with a single conformer containing 2D coordinates
294  \param reference - a molecule with the reference atoms to align to.
295  By default this should be the same as mol, but with
296  3D coordinates
297  \param confId - (optional) the id of the reference conformation to use
298  \param refPattern - (optional) a query molecule to map a subset of
299  the reference onto the mol, so that only some of the
300  atoms are aligned.
301  \param acceptFailure - (optional) if true, standard depictions will be
302  generated
303  for molecules that don't match the reference or the
304  referencePattern; if false, throws a DepictException.
305  \param forceRDKit - (optional) use RDKit to generate coordinates even if
306  preferCoordGen is set to true
307 */
309  RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId = -1,
310  RDKit::ROMol *referencePattern = nullptr, bool acceptFailure = false,
311  bool forceRDKit = false);
312 
313 //! \brief Rotate the 2D depiction such that the majority of bonds have an
314 //! angle with the X axis which is a multiple of 30 degrees.
315 /*!
316 
317  ARGUMENTS:
318  \param mol - the molecule to be rotated
319  \param confId - (optional) the id of the reference conformation to use
320  \param minimizeRotation - (optional) if false (the default), the molecule
321  is rotated such that the majority of bonds have an angle with the
322  X axis of 30 or 90 degrees. If true, the minimum rotation is applied
323  such that the majority of bonds have an angle with the X axis of
324  0, 30, 60, or 90 degrees, with the goal of altering the initial
325  orientation as little as possible .
326 */
327 
329  int confId = -1,
330  bool minimizeRotation = false);
331 
332 //! \brief Normalizes the 2D depiction.
333 /*!
334  If canonicalize is != 0, the depiction is subjected to a canonical
335  transformation such that its main axis is aligned along the X axis
336  (canonicalize >0, the default) or the Y axis (canonicalize <0).
337  If canonicalize is 0, no canonicalization takes place.
338  If scaleFactor is <0.0 (the default) the depiction is scaled such
339  that bond lengths conform to RDKit standards. The applied scaling
340  factor is returned.
341 
342  ARGUMENTS:
343  \param mol - the molecule to be normalized
344  \param confId - (optional) the id of the reference conformation to use
345  \param canonicalize - (optional) if != 0, a canonical transformation is
346  applied: if >0 (the default), the main molecule axis is
347  aligned to the X axis, if <0 to the Y axis.
348  If 0, no canonical transformation is applied.
349  \param scaleFactor - (optional) if >0.0, the scaling factor to apply. The
350  default (-1.0) means that the depiction is automatically
351  scaled such that bond lengths are the standard RDKit
352  ones.
353  RETURNS:
354 
355  \return the applied scaling factor.
356 */
357 
359  int confId = -1,
360  int canonicalize = 1,
361  double scaleFactor = -1.0);
362 }; // namespace RDDepict
363 
364 #endif
const char * what() const noexcept override
Definition: RDDepictor.h:35
DepictException(const char *msg)
Definition: RDDepictor.h:33
~DepictException() noexcept override=default
DepictException(const std::string msg)
Definition: RDDepictor.h:34
#define RDKIT_DEPICTOR_EXPORT
Definition: export.h:89
boost::shared_array< double > DOUBLE_SMART_PTR
Definition: EmbeddedFrag.h:26
RDKIT_DEPICTOR_EXPORT void generateDepictionMatching3DStructure(RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId=-1, RDKit::ROMol *referencePattern=nullptr, bool acceptFailure=false, bool forceRDKit=false)
Generate a 2D depiction for a molecule where all or part of it mimics the coordinates of a 3D referen...
void RDKIT_DEPICTOR_EXPORT loadDefaultRingSystemTemplates()
Load default ring system templates to be used in 2D coordinate generation.
RDKIT_DEPICTOR_EXPORT double normalizeDepiction(RDKit::ROMol &mol, int confId=-1, int canonicalize=1, double scaleFactor=-1.0)
Normalizes the 2D depiction.
void RDKIT_DEPICTOR_EXPORT addRingSystemTemplates(const std::string templatePath)
Add ring system templates to be used in 2D coordinater generation. If there are duplicates,...
RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoords(RDKit::ROMol &mol, const Compute2DCoordParameters &params)
Generate 2D coordinates (a depiction) for a molecule.
RDKIT_DEPICTOR_EXPORT void straightenDepiction(RDKit::ROMol &mol, int confId=-1, bool minimizeRotation=false)
Rotate the 2D depiction such that the majority of bonds have an angle with the X axis which is a mult...
void RDKIT_DEPICTOR_EXPORT setRingSystemTemplates(const std::string templatePath)
Set the path to the file containing the ring system templates.
RDKIT_DEPICTOR_EXPORT RDKit::MatchVectType generateDepictionMatching2DStructure(RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId=-1, const RDKit::ROMol *referencePattern=static_cast< const RDKit::ROMol * >(nullptr), bool acceptFailure=false, bool forceRDKit=false, bool allowOptionalAttachments=false)
Compute 2D coordinates where a piece of the molecule is constrained to have the same coordinates as a...
RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoordsMimicDistMat(RDKit::ROMol &mol, const DOUBLE_SMART_PTR *dmat=nullptr, bool canonOrient=true, bool clearConfs=true, double weightDistMat=0.5, unsigned int nFlipsPerSample=3, unsigned int nSamples=100, int sampleSeed=25, bool permuteDeg4Nodes=true, bool forceRDKit=false)
Compute the 2D coordinates such the interatom distances mimic those in a distance matrix.
RDKIT_DEPICTOR_EXPORT bool preferCoordGen
std::map< int, Point2D > INT_POINT2D_MAP
Definition: point.h:550
Std stuff.
Definition: Abbreviations.h:19
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)