RDKit
Open-source cheminformatics and machine learning.
MorganGenerator.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RD_MORGANGEN_H_2018_07
13 #define RD_MORGANGEN_H_2018_07
14 
16 #include <cstdint>
17 
18 namespace RDKit {
19 
20 namespace MorganFingerprint {
21 
22 /**
23  \brief Default atom invariants generator for Morgan fingerprint, generates
24  ECFP-type invariants
25 
26  */
28  : public AtomInvariantsGenerator {
29  const bool df_includeRingMembership;
30 
31  public:
32  /**
33  \brief Construct a new MorganAtomInvGenerator object
34 
35  \param includeRingMembership : if set, whether or not the atom is in a ring
36  will be used in the invariant list.
37  */
38  MorganAtomInvGenerator(const bool includeRingMembership = true);
39 
40  std::vector<std::uint32_t> *getAtomInvariants(
41  const ROMol &mol) const override;
42 
43  std::string infoString() const override;
44  MorganAtomInvGenerator *clone() const override;
45 };
46 
47 /**
48  \brief Alternative atom invariants generator for Morgan fingerprint, generate
49  FCFP-type invariants
50 
51  */
53  : public AtomInvariantsGenerator {
54  std::vector<const ROMol *> *dp_patterns;
55 
56  public:
57  /**
58  \brief Construct a new MorganFeatureAtomInvGenerator object
59 
60  \param patterns : if provided should contain the queries used to assign
61  atom-types. if not provided, feature definitions adapted from reference:
62  Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998) will be used for
63  Donor, Acceptor, Aromatic, Halogen, Basic, Acidic.
64  */
65  MorganFeatureAtomInvGenerator(std::vector<const ROMol *> *patterns = nullptr);
66 
67  std::vector<std::uint32_t> *getAtomInvariants(
68  const ROMol &mol) const override;
69 
70  std::string infoString() const override;
72 };
73 
74 /**
75  \brief Bond invariants generator for Morgan fingerprint
76 
77  */
79  : public BondInvariantsGenerator {
80  const bool df_useBondTypes;
81  const bool df_useChirality;
82 
83  public:
84  /**
85  \brief Construct a new MorganBondInvGenerator object
86 
87  \param useBondTypes : if set, bond types will be included as a part of the
88  bond invariants
89  \param useChirality : if set, chirality information will be included as a
90  part of the bond invariants
91  */
92  MorganBondInvGenerator(const bool useBondTypes = true,
93  const bool useChirality = false);
94 
95  std::vector<std::uint32_t> *getBondInvariants(
96  const ROMol &mol) const override;
97 
98  std::string infoString() const override;
99  MorganBondInvGenerator *clone() const override;
100  ~MorganBondInvGenerator() override = default;
101 };
102 
103 /**
104  \brief Class for holding Morgan fingerprint specific arguments
105 
106  */
108  public:
109  bool df_onlyNonzeroInvariants = false;
110  unsigned int d_radius = 3;
111  bool df_includeRedundantEnvironments = false;
112 
113  std::string infoString() const override;
114 
115  /**
116  \brief Construct a new MorganArguments object
117 
118  \param radius the number of iterations to grow the fingerprint
119  \param countSimulation if set, use count simulation while generating the
120  fingerprint
121  \param includeChirality if set, chirality information will be added to the
122  generated bit id, independently from bond invariants
123  \param onlyNonzeroInvariants if set, bits will only be set from atoms that
124  have a nonzero invariant
125  \param countBounds boundaries for count simulation, corresponding bit will
126  be set if the count is higher than the number provided for that spot
127  \param fpSize size of the generated fingerprint, does not affect the sparse
128  versions
129  \param includeRedundantEnvironments if set redundant environments will be
130  included in the fingerprint
131  */
132  MorganArguments(unsigned int radius, bool countSimulation = false,
133  bool includeChirality = false,
134  bool onlyNonzeroInvariants = false,
135  std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
136  std::uint32_t fpSize = 2048,
137  bool includeRedundantEnvironments = false)
138  : FingerprintArguments(countSimulation, countBounds, fpSize, 1,
139  includeChirality),
140  df_onlyNonzeroInvariants(onlyNonzeroInvariants),
141  d_radius(radius),
142  df_includeRedundantEnvironments(includeRedundantEnvironments){};
143 };
144 
145 /**
146  \brief Class for holding the bit-id created from Morgan fingerprint
147  environments and the additional data necessary extra outputs
148 
149  */
150 template <typename OutputType>
152  : public AtomEnvironment<OutputType> {
153  const OutputType d_code;
154  const unsigned int d_atomId;
155  const unsigned int d_layer;
156 
157  public:
158  OutputType getBitId(
159  FingerprintArguments *arguments, // unused
160  const std::vector<std::uint32_t> *atomInvariants, // unused
161  const std::vector<std::uint32_t> *bondInvariants, // unused
162  AdditionalOutput *additionalOutput, // unused
163  const bool hashResults = false, // unused
164  const std::uint64_t fpSize = 0 // unused
165  ) const override;
167  size_t bitId) const override;
168 
169  /**
170  \brief Construct a new MorganAtomEnv object
171 
172  \param code bit id generated from this environment
173  \param atomId atom id of the atom at the center of this environment
174  \param layer radius of this environment
175  */
176  MorganAtomEnv(const std::uint32_t code, const unsigned int atomId,
177  const unsigned int layer);
178 };
179 
180 /**
181  \brief Class that generates atom environments for Morgan fingerprint
182 
183  */
184 template <typename OutputType>
186  : public AtomEnvironmentGenerator<OutputType> {
187  public:
188  std::vector<AtomEnvironment<OutputType> *> getEnvironments(
189  const ROMol &mol, FingerprintArguments *arguments,
190  const std::vector<std::uint32_t> *fromAtoms,
191  const std::vector<std::uint32_t> *ignoreAtoms, const int confId,
192  const AdditionalOutput *additionalOutput,
193  const std::vector<std::uint32_t> *atomInvariants,
194  const std::vector<std::uint32_t> *bondInvariants,
195  const bool hashResults = false) const override;
196 
197  std::string infoString() const override;
198  OutputType getResultSize() const override;
199 };
200 
201 /**
202  \brief Get a fingerprint generator for Morgan fingerprint
203 
204  \tparam OutputType determines the size of the bitIds and the result, can be 32
205  or 64 bit unsigned integer
206 
207  \param radius the number of iterations to grow the fingerprint
208 
209  \param countSimulation if set, use count simulation while generating the
210  fingerprint
211 
212  \param includeChirality if set, chirality information will be added to the
213  generated bit id, independently from bond invariants
214 
215  \param onlyNonzeroInvariants if set, bits will only be set from atoms that
216  have a nonzero invariant
217 
218  \param countBounds boundaries for count simulation, corresponding bit will be
219  set if the count is higher than the number provided for that spot
220 
221  \param fpSize size of the generated fingerprint, does not affect the sparse
222  versions
223  \param countSimulation if set, use count simulation while generating the
224  fingerprint
225  \param includeChirality sets includeChirality flag for both MorganArguments
226  and the default bond generator MorganBondInvGenerator
227  \param useBondTypes if set, bond types will be included as a part of the
228  default bond invariants
229  \param onlyNonzeroInvariants if set, bits will only be set from atoms that
230  have a nonzero invariant
231  \param includeRedundantEnvironments if set redundant environments will be
232  included in the fingerprint
233  \param atomInvariantsGenerator custom atom invariants generator to use
234  \param bondInvariantsGenerator custom bond invariants generator to use
235  \param ownsAtomInvGen if set atom invariants generator is destroyed with the
236  fingerprint generator
237  \param ownsBondInvGen if set bond invariants generator is destroyed with the
238  fingerprint generator
239 
240  \return FingerprintGenerator<OutputType>* that generates Morgan fingerprints
241 
242 This generator supports the following \c AdditionalOutput types:
243  - \c atomToBits : which bits each atom is the central atom for
244  - \c atomCounts : how many bits each atom sets
245  - \c bitInfoMap : map from bitId to (atomId, radius) pairs
246 
247  */
248 template <typename OutputType>
250  unsigned int radius, bool countSimulation, bool includeChirality,
251  bool useBondTypes, bool onlyNonzeroInvariants,
252  bool includeRedundantEnvironments,
253  AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
254  BondInvariantsGenerator *bondInvariantsGenerator = nullptr,
255  std::uint32_t fpSize = 2048,
256  std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
257  bool ownsAtomInvGen = false, bool ownsBondInvGen = false);
258 
259 /**
260  \brief Get a fingerprint generator for Morgan fingerprint
261 
262  \tparam OutputType determines the size of the bitIds and the result, can be 32
263  or 64 bit unsigned integer
264 
265  \param radius the number of iterations to grow the fingerprint
266 
267  \param countSimulation if set, use count simulation while generating the
268  fingerprint
269 
270  \param includeChirality if set, chirality information will be added to the
271  generated bit id, independently from bond invariants
272 
273  \param onlyNonzeroInvariants if set, bits will only be set from atoms that
274  have a nonzero invariant
275 
276  \param countBounds boundaries for count simulation, corresponding bit will be
277  set if the count is higher than the number provided for that spot
278 
279  \param fpSize size of the generated fingerprint, does not affect the sparse
280  versions
281  \param countSimulation if set, use count simulation while generating the
282  fingerprint
283  \param includeChirality sets includeChirality flag for both MorganArguments
284  and the default bond generator MorganBondInvGenerator
285  \param useBondTypes if set, bond types will be included as a part of the
286  default bond invariants
287  \param onlyNonzeroInvariants if set, bits will only be set from atoms that
288  have a nonzero invariant
289  \param atomInvariantsGenerator custom atom invariants generator to use
290  \param bondInvariantsGenerator custom bond invariants generator to use
291  \param ownsAtomInvGen if set atom invariants generator is destroyed with the
292  fingerprint generator
293  \param ownsBondInvGen if set bond invariants generator is destroyed with the
294  fingerprint generator
295 
296  \return FingerprintGenerator<OutputType>* that generates Morgan fingerprints
297 
298 This generator supports the following \c AdditionalOutput types:
299  - \c atomToBits : which bits each atom is the central atom for
300  - \c atomCounts : how many bits each atom sets
301  - \c bitInfoMap : map from bitId to (atomId, radius) pairs
302 
303  */
304 template <typename OutputType>
306  unsigned int radius, bool countSimulation = false,
307  bool includeChirality = false, bool useBondTypes = true,
308  bool onlyNonzeroInvariants = false,
309  AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
310  BondInvariantsGenerator *bondInvariantsGenerator = nullptr,
311  std::uint32_t fpSize = 2048,
312  std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
313  bool ownsAtomInvGen = false, bool ownsBondInvGen = false) {
314  return getMorganGenerator<OutputType>(
315  radius, countSimulation, includeChirality, useBondTypes,
316  onlyNonzeroInvariants, false, atomInvariantsGenerator,
317  bondInvariantsGenerator, fpSize, countBounds, ownsAtomInvGen,
318  ownsBondInvGen);
319 };
320 
321 } // namespace MorganFingerprint
322 } // namespace RDKit
323 
324 #endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
abstract base class for bond invariants generators
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
class that generates same fingerprint style for different output formats
Class for holding Morgan fingerprint specific arguments.
MorganArguments(unsigned int radius, bool countSimulation=false, bool includeChirality=false, bool onlyNonzeroInvariants=false, std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, std::uint32_t fpSize=2048, bool includeRedundantEnvironments=false)
Construct a new MorganArguments object.
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
Class for holding the bit-id created from Morgan fingerprint environments and the additional data nec...
void updateAdditionalOutput(AdditionalOutput *output, size_t bitId) const override
MorganAtomEnv(const std::uint32_t code, const unsigned int atomId, const unsigned int layer)
Construct a new MorganAtomEnv object.
OutputType getBitId(FingerprintArguments *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, AdditionalOutput *additionalOutput, const bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
Default atom invariants generator for Morgan fingerprint, generates ECFP-type invariants.
MorganAtomInvGenerator(const bool includeRingMembership=true)
Construct a new MorganAtomInvGenerator object.
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
MorganAtomInvGenerator * clone() const override
Bond invariants generator for Morgan fingerprint.
std::string infoString() const override
method that returns information about this /c BondInvariantsGenerator and its arguments
MorganBondInvGenerator * clone() const override
MorganBondInvGenerator(const bool useBondTypes=true, const bool useChirality=false)
Construct a new MorganBondInvGenerator object.
std::vector< std::uint32_t > * getBondInvariants(const ROMol &mol) const override
get bond invariants from a molecule
Class that generates atom environments for Morgan fingerprint.
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
Alternative atom invariants generator for Morgan fingerprint, generate FCFP-type invariants.
MorganFeatureAtomInvGenerator * clone() const override
MorganFeatureAtomInvGenerator(std::vector< const ROMol * > *patterns=nullptr)
Construct a new MorganFeatureAtomInvGenerator object.
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:177
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getMorganGenerator(unsigned int radius, bool countSimulation, bool includeChirality, bool useBondTypes, bool onlyNonzeroInvariants, bool includeRedundantEnvironments, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, BondInvariantsGenerator *bondInvariantsGenerator=nullptr, std::uint32_t fpSize=2048, std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, bool ownsAtomInvGen=false, bool ownsBondInvGen=false)
Get a fingerprint generator for Morgan fingerprint.
Std stuff.
Definition: Abbreviations.h:19