RDKit
Open-source cheminformatics and machine learning.
AtomPairGenerator.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RD_ATOMPAIRGEN_H_2018_06
13 #define RD_ATOMPAIRGEN_H_2018_06
14 
17 
18 namespace RDKit {
19 namespace AtomPair {
20 using namespace AtomPairs;
21 
23  : public AtomInvariantsGenerator {
24  const bool df_includeChirality;
25  const bool df_topologicalTorsionCorrection;
26 
27  public:
28  /**
29  \brief Construct a new AtomPairAtomInvGenerator object
30 
31  \param includeChirality toggles the inclusions of bits indicating R/S
32  chirality
33  \param topologicalTorsionCorrection when set subtracts 2 from invariants
34  returned, added so TopologicalTorsionGenerator can use this
35  */
36  AtomPairAtomInvGenerator(bool includeChirality = false,
37  bool topologicalTorsionCorrection = false);
38 
39  std::vector<std::uint32_t> *getAtomInvariants(
40  const ROMol &mol) const override;
41 
42  std::string infoString() const override;
43  AtomPairAtomInvGenerator *clone() const override;
44 };
45 
46 /*!
47  \brief class that holds atom-pair fingerprint specific arguments
48 
49  */
51  : public FingerprintArguments {
52  public:
53  bool df_use2D;
54  unsigned int d_minDistance;
55  unsigned int d_maxDistance;
56 
57  std::string infoString() const override;
58 
59  /*!
60  \brief construct a new AtomPairArguments object
61 
62  \param countSimulation if set, use count simulation while generating the
63  fingerprint
64  \param includeChirality if set, chirality will be used in the atom
65  invariants, this is ignored if atomInvariantsGenerator is present for
66  the /c FingerprintGenerator that uses this
67  \param use2D if set, the 2D (topological) distance matrix will be
68  used
69  \param minDistance minimum distance between atoms to be considered in a
70  pair, default is 1 bond
71  \param maxDistance maximum distance between atoms to be considered in a
72  pair, default is maxPathLen-1 bonds
73  \param countBounds boundaries for count simulation, corresponding bit
74  will be set if the count is higher than the number provided for that spot
75  \param fpSize size of the generated fingerprint, does not affect the sparse
76  versions
77 
78  */
79  AtomPairArguments(const bool countSimulation = true,
80  const bool includeChirality = false,
81  const bool use2D = true, const unsigned int minDistance = 1,
82  const unsigned int maxDistance = (maxPathLen - 1),
83  const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
84  const std::uint32_t fpSize = 2048);
85 };
86 
87 /*!
88  \brief class that holds atom-environment data needed for atom-pair fingerprint
89  generation
90 
91  */
92 template <typename OutputType>
94  : public AtomEnvironment<OutputType> {
95  const unsigned int d_atomIdFirst;
96  const unsigned int d_atomIdSecond;
97  const unsigned int d_distance;
98 
99  public:
100  OutputType getBitId(
101  FingerprintArguments *arguments,
102  const std::vector<std::uint32_t> *atomInvariants,
103  const std::vector<std::uint32_t> *bondInvariants, // unused
104  AdditionalOutput *additionalOutput, // unused
105  const bool hashResults = false,
106  const std::uint64_t fpSize = 0 // unused
107  ) const override;
109  size_t bitId) const override;
110 
111  /*!
112  \brief construct a new AtomPairAtomEnv object
113 
114  \param atomIdFirst id of the first atom of the atom-pair
115  \param atomIdSecond id of the second atom of the atom-pair
116  \param distance distance between the atoms
117  */
118  AtomPairAtomEnv(const unsigned int atomIdFirst,
119  const unsigned int atomIdSecond, const unsigned int distance);
120 };
121 
122 /*!
123  \brief class that generates atom-environments for atom-pair fingerprint
124 
125  */
126 template <typename OutputType>
128  : public AtomEnvironmentGenerator<OutputType> {
129  public:
130  std::vector<AtomEnvironment<OutputType> *> getEnvironments(
131  const ROMol &mol, FingerprintArguments *arguments,
132  const std::vector<std::uint32_t> *fromAtoms,
133  const std::vector<std::uint32_t> *ignoreAtoms, const int confId,
134  const AdditionalOutput *additionalOutput,
135  const std::vector<std::uint32_t> *atomInvariants,
136  const std::vector<std::uint32_t> *bondInvariants,
137  const bool hashResults = false) const override;
138 
139  std::string infoString() const override;
140  OutputType getResultSize() const override;
141 };
142 
143 /*!
144  \brief helper function that generates a /c FingerprintGenerator that generates
145  atom-pair fingerprints
146  \tparam OutputType determines the size of the bitIds and the result, can be 32
147  or 64 bit unsigned integer
148  \param minDistance minimum distance between atoms to be considered in a pair,
149  default is 1 bond
150  \param maxDistance maximum distance between atoms to be considered in a pair,
151  default is maxPathLen-1 bonds
152  \param includeChirality if set, chirality will be used in the atom invariants,
153  this is ignored if atomInvariantsGenerator is provided
154  \param use2D if set, the 2D (topological) distance matrix will be used
155  \param atomInvariantsGenerator atom invariants to be used during fingerprint
156  generation
157  \param useCountSimulation if set, use count simulation while generating the
158  fingerprint
159  \param countBounds boundaries for count simulation, corresponding bit will be
160  set if the count is higher than the number provided for that spot
161  \param fpSize size of the generated fingerprint, does not affect the sparse
162  versions
163  \param ownsAtomInvGen if set atom invariants generator is destroyed with the
164  fingerprint generator
165 
166  \return FingerprintGenerator<OutputType>* that generates atom-pair
167  fingerprints
168 
169  This generator supports the following \c AdditionalOutput types:
170  - \c atomToBits : which bits each atom is involved in
171  - \c atomCounts : how many bits each atom sets
172  - \c bitInfoMap : map from bitId to (atomId1, atomId2) pairs
173 
174  */
175 template <typename OutputType>
178  const unsigned int minDistance = 1,
179  const unsigned int maxDistance = maxPathLen - 1,
180  const bool includeChirality = false, const bool use2D = true,
181  AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
182  const bool useCountSimulation = true, const std::uint32_t fpSize = 2048,
183  const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
184  const bool ownsAtomInvGen = false);
185 
186 } // namespace AtomPair
187 } // namespace RDKit
188 
189 #endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
class that holds atom-pair fingerprint specific arguments
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
AtomPairArguments(const bool countSimulation=true, const bool includeChirality=false, const bool use2D=true, const unsigned int minDistance=1, const unsigned int maxDistance=(maxPathLen - 1), const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, const std::uint32_t fpSize=2048)
construct a new AtomPairArguments object
class that holds atom-environment data needed for atom-pair fingerprint generation
void updateAdditionalOutput(AdditionalOutput *output, size_t bitId) const override
OutputType getBitId(FingerprintArguments *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, AdditionalOutput *additionalOutput, const bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
AtomPairAtomEnv(const unsigned int atomIdFirst, const unsigned int atomIdSecond, const unsigned int distance)
construct a new AtomPairAtomEnv object
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
AtomPairAtomInvGenerator * clone() const override
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
AtomPairAtomInvGenerator(bool includeChirality=false, bool topologicalTorsionCorrection=false)
Construct a new AtomPairAtomInvGenerator object.
class that generates atom-environments for atom-pair fingerprint
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
class that generates same fingerprint style for different output formats
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:177
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getAtomPairGenerator(const unsigned int minDistance=1, const unsigned int maxDistance=maxPathLen - 1, const bool includeChirality=false, const bool use2D=true, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, const bool useCountSimulation=true, const std::uint32_t fpSize=2048, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, const bool ownsAtomInvGen=false)
helper function that generates a /c FingerprintGenerator that generates atom-pair fingerprints
const unsigned int maxPathLen
Std stuff.
Definition: Abbreviations.h:19