RDKit
Open-source cheminformatics and machine learning.
MorganFingerprints.h
Go to the documentation of this file.
1 //
2 //
3 // Copyright (c) 2009-2010, Novartis Institutes for BioMedical Research Inc.
4 // and other RDKit contributors
5 //
6 // All rights reserved.
7 //
8 // Redistribution and use in source and binary forms, with or without
9 // modification, are permitted provided that the following conditions are
10 // met:
11 //
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
14 // * Redistributions in binary form must reproduce the above
15 // copyright notice, this list of conditions and the following
16 // disclaimer in the documentation and/or other materials provided
17 // with the distribution.
18 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
19 // nor the names of its contributors may be used to endorse or promote
20 // products derived from this software without specific prior written
21 // permission.
22 //
23 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 //
35 // Created by Greg Landrum, July 2008
36 //
37 //
38 
39 /*! \file MorganFingerprints.h
40 
41 */
42 #include <RDGeneral/export.h>
43 #ifndef __RD_MORGANFPS_H__
44 #define __RD_MORGANFPS_H__
45 
46 #include <vector>
47 #include <map>
50 #include <cstdint>
52 
53 namespace RDKit {
54 class ROMol;
55 namespace MorganFingerprints {
56 typedef std::map<std::uint32_t,
57  std::vector<std::pair<std::uint32_t, std::uint32_t>>>
59 
60 const std::string morganFingerprintVersion = "1.0.0";
61 
62 //! returns the Morgan fingerprint for a molecule
63 /*!
64  These fingerprints are similar to the well-known ECFP or
65  FCFP fingerprints, depending on which invariants are used.
66 
67  The algorithm used is described in the paper
68  Rogers, D. & Hahn, M. Extended-Connectivity Fingerprints. JCIM 50:742-54
69  (2010)
70  https://doi.org/10.1021/ci100050t
71 
72  The original implementation was done using this paper:
73  D. Rogers, R.D. Brown, M. Hahn J. Biomol. Screen. 10:682-6 (2005)
74  and an unpublished technical report:
75  http://www.ics.uci.edu/~welling/teaching/ICS274Bspring06/David%20Rogers%20-%20ECFP%20Manuscript.doc
76 
77  \param mol: the molecule to be fingerprinted
78  \param radius: the number of iterations to grow the fingerprint
79  \param invariants : optional pointer to a set of atom invariants to
80  be used. By default ECFP-type invariants are used
81  (calculated by getConnectivityInvariants())
82  \param fromAtoms : if this is provided, only the atoms in the vector will be
83  used as centers in the fingerprint
84  \param useChirality : if set, additional information will be added to the
85  fingerprint
86  when chiral atoms are discovered. This will cause
87  \verbatim C[C@H](F)Cl,
88  C[C@@H](F)Cl, and CC(F)Cl \endverbatim to generate
89  different fingerprints.
90  \param useBondTypes : if set, bond types will be included as part of the hash
91  for
92  calculating bits
93  \param useCounts : if set, counts of the features will be used
94  \param onlyNonzeroInvariants : if set, bits will only be set from atoms that
95  have a nonzero invariant.
96  \param atomsSettingBits : if nonzero, this will be used to return information
97  about the atoms that set each particular bit.
98  The keys are the map are bit ids, the values
99  are lists of (atomId, radius) pairs.
100  \param includeRedundantEnvironments : if set, the check for redundant atom
101  environments will not be done.
102 
103  \return a pointer to the fingerprint. The client is
104  responsible for calling delete on this.
105 
106 */
108  const ROMol &mol, unsigned int radius,
109  std::vector<boost::uint32_t> *invariants = nullptr,
110  const std::vector<boost::uint32_t> *fromAtoms = nullptr,
111  bool useChirality = false, bool useBondTypes = true, bool useCounts = true,
112  bool onlyNonzeroInvariants = false, BitInfoMap *atomsSettingBits = nullptr,
113  bool includeRedundantEnvironments = false);
114 
115 //! returns the Morgan fingerprint for a molecule
116 /*!
117  These fingerprints are similar to the well-known ECFP or
118  FCFP fingerprints, depending on which invariants are used.
119 
120  The algorithm used is described in the paper
121  Rogers, D. & Hahn, M. Extended-Connectivity Fingerprints. JCIM 50:742-54
122  (2010)
123  https://doi.org/10.1021/ci100050t
124 
125  The original implementation was done using this paper:
126  D. Rogers, R.D. Brown, M. Hahn J. Biomol. Screen. 10:682-6 (2005)
127  and an unpublished technical report:
128  http://www.ics.uci.edu/~welling/teaching/ICS274Bspring06/David%20Rogers%20-%20ECFP%20Manuscript.doc
129 
130  \param mol: the molecule to be fingerprinted
131  \param radius: the number of iterations to grow the fingerprint
132  \param invariants : optional pointer to a set of atom invariants to
133  be used. By default ECFP-type invariants are used
134  (calculated by getConnectivityInvariants())
135  \param fromAtoms : if this is provided, only the atoms in the vector will be
136  used as centers in the fingerprint
137  \param useChirality : if set, additional information will be added to the
138  fingerprint
139  when chiral atoms are discovered. This will cause
140  \verbatim C[C@H](F)Cl,
141  C[C@@H](F)Cl, and CC(F)Cl \endverbatim to generate
142  different fingerprints.
143  \param useBondTypes : if set, bond types will be included as part of the hash
144  for
145  calculating bits
146  \param onlyNonzeroInvariants : if set, bits will only be set from atoms that
147  have a nonzero invariant.
148  \param atomsSettingBits : if nonzero, this will be used to return information
149  about the atoms that set each particular bit.
150  The keys are the map are bit ids, the values
151  are lists of (atomId, radius) pairs.
152  \param includeRedundantEnvironments : if set, the check for redundant atom
153  environments will not be done.
154 
155  \return a pointer to the fingerprint. The client is
156  responsible for calling delete on this.
157 
158 */
160  const ROMol &mol, unsigned int radius, unsigned int nBits = 2048,
161  std::vector<boost::uint32_t> *invariants = nullptr,
162  const std::vector<boost::uint32_t> *fromAtoms = nullptr,
163  bool useChirality = false, bool useBondTypes = true,
164  bool onlyNonzeroInvariants = false, BitInfoMap *atomsSettingBits = nullptr,
165  bool includeRedundantEnvironments = false);
166 
167 //! returns the Morgan fingerprint for a molecule as a bit vector
168 /*!
169  see documentation for getFingerprint() for theory/references
170 
171  \param mol: the molecule to be fingerprinted
172  \param radius: the number of iterations to grow the fingerprint
173  \param nBits: the number of bits in the final fingerprint
174  \param invariants : optional pointer to a set of atom invariants to
175  be used. By default ECFP-type invariants are used
176  (calculated by getConnectivityInvariants())
177  \param fromAtoms : if this is provided, only the atoms in the vector will be
178  used as centers in the fingerprint
179  \param useChirality : if set, additional information will be added to the
180  fingerprint
181  when chiral atoms are discovered. This will cause
182  \verbatim C[C@H](F)Cl,
183  C[C@@H](F)Cl, and CC(F)Cl \endverbatim to generate
184  different fingerprints.
185  \param useBondTypes : if set, bond types will be included as part of the hash
186  for
187  calculating bits
188  \param onlyNonzeroInvariants : if set, bits will only be set from atoms that
189  have a nonzero invariant.
190  \param atomsSettingBits : if nonzero, this will be used to return information
191  about the atoms that set each particular bit.
192  The keys are the map are bit ids, the values
193  are lists of (atomId, radius) pairs.
194  \param includeRedundantEnvironments : if set, the check for redundant atom
195  environments will not be done.
196 
197  \return a pointer to the fingerprint. The client is
198  responsible for calling delete on this.
199 
200 */
202  const ROMol &mol, unsigned int radius, unsigned int nBits,
203  std::vector<std::uint32_t> *invariants = nullptr,
204  const std::vector<std::uint32_t> *fromAtoms = nullptr,
205  bool useChirality = false, bool useBondTypes = true,
206  bool onlyNonzeroInvariants = false, BitInfoMap *atomsSettingBits = nullptr,
207  bool includeRedundantEnvironments = false);
208 
209 } // end of namespace MorganFingerprints
210 } // namespace RDKit
211 
212 #endif
a class for bit vectors that are densely occupied
a class for efficiently storing sparse vectors of ints
Definition: SparseIntVect.h:28
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:177
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::uint32_t > * getFingerprint(const ROMol &mol, unsigned int radius, std::vector< boost::uint32_t > *invariants=nullptr, const std::vector< boost::uint32_t > *fromAtoms=nullptr, bool useChirality=false, bool useBondTypes=true, bool useCounts=true, bool onlyNonzeroInvariants=false, BitInfoMap *atomsSettingBits=nullptr, bool includeRedundantEnvironments=false)
returns the Morgan fingerprint for a molecule
std::map< std::uint32_t, std::vector< std::pair< std::uint32_t, std::uint32_t > > > BitInfoMap
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * getFingerprintAsBitVect(const ROMol &mol, unsigned int radius, unsigned int nBits, std::vector< std::uint32_t > *invariants=nullptr, const std::vector< std::uint32_t > *fromAtoms=nullptr, bool useChirality=false, bool useBondTypes=true, bool onlyNonzeroInvariants=false, BitInfoMap *atomsSettingBits=nullptr, bool includeRedundantEnvironments=false)
returns the Morgan fingerprint for a molecule as a bit vector
const std::string morganFingerprintVersion
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::uint32_t > * getHashedFingerprint(const ROMol &mol, unsigned int radius, unsigned int nBits=2048, std::vector< boost::uint32_t > *invariants=nullptr, const std::vector< boost::uint32_t > *fromAtoms=nullptr, bool useChirality=false, bool useBondTypes=true, bool onlyNonzeroInvariants=false, BitInfoMap *atomsSettingBits=nullptr, bool includeRedundantEnvironments=false)
returns the Morgan fingerprint for a molecule
Std stuff.
Definition: Abbreviations.h:19