RDKit
Open-source cheminformatics and machine learning.
SubstructMatch.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2020 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_SUBSTRUCTMATCH_H
12 #define RD_SUBSTRUCTMATCH_H
13 
14 // std bits
15 #include <vector>
16 
17 #include <unordered_set>
18 #include <functional>
19 #include <unordered_map>
20 #include <cstdint>
21 #include <string>
22 
23 #include <boost/dynamic_bitset.hpp>
24 #if BOOST_VERSION >= 107100
25 #define RDK_INTERNAL_BITSET_HAS_HASH
26 #endif
27 
28 #include <GraphMol/StereoGroup.h>
29 
30 namespace RDKit {
31 class ROMol;
32 class Atom;
33 class Bond;
34 class ResonanceMolSupplier;
35 class MolBundle;
36 
37 //! \brief used to return matches from substructure searching,
38 //! The format is (queryAtomIdx, molAtomIdx)
39 typedef std::vector<std::pair<int, int>> MatchVectType;
40 
42  bool useChirality = false; //!< Use chirality in determining whether or not
43  //!< atoms/bonds match
44  bool useEnhancedStereo = false; //!< Use enhanced stereochemistry in
45  //!< determining whether atoms/bonds match
46  bool aromaticMatchesConjugated = false; //!< Aromatic and conjugated bonds
47  //!< match each other
48  bool useQueryQueryMatches = false; //!< Consider query-query matches, not
49  //!< just simple matches
50  bool useGenericMatchers = false; //!< Looks for generic atoms in the query
51  //!< and uses them as part of the matching
52  bool recursionPossible = true; //!< Allow recursive queries
53  bool uniquify = true; //!< uniquify (by atom index) match results
54  unsigned int maxMatches = 1000; //!< maximum number of matches to return
55  int numThreads = 1; //!< number of threads to use when multi-threading
56  //!< is possible. 0 selects the number of
57  //!< concurrent threads supported by the hardware
58  //!< negative values are added to the number of
59  //!< concurrent threads supported by the hardware
60  std::function<bool(const ROMol &mol,
61  const std::vector<unsigned int> &match)>
62  extraFinalCheck; //!< a function to be called at the end to validate a
63  //!< match
64 
66 };
67 
69  SubstructMatchParameters &params, const std::string &json);
71  const SubstructMatchParameters &params);
72 
73 //! Find a substructure match for a query in a molecule
74 /*!
75  \param mol The ROMol to be searched
76  \param query The query ROMol
77  \param matchParams Parameters controlling the matching
78 
79  \return The matches, if any
80 
81 */
82 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
83  const ROMol &mol, const ROMol &query,
85 
86 //! Find all substructure matches for a query in a ResonanceMolSupplier object
87 /*!
88  \param resMolSuppl The ResonanceMolSupplier object to be searched
89  \param query The query ROMol
90  \param matchParams Parameters controlling the matching
91 
92  \return The matches, if any
93 
94 */
95 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
96  ResonanceMolSupplier &resMolSuppl, const ROMol &query,
98 
99 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
100  const MolBundle &bundle, const ROMol &query,
102 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
103  const ROMol &mol, const MolBundle &query,
105 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
106  const MolBundle &bundle, const MolBundle &query,
108 
109 //! Find a substructure match for a query
110 /*!
111  \param mol The object to be searched
112  \param query The query
113  \param matchVect Used to return the match
114  (pre-existing contents will be deleted)
115  \param recursionPossible flags whether or not recursive matches are allowed
116  \param useChirality use atomic CIP codes as part of the comparison
117  \param useQueryQueryMatches if set, the contents of atom and bond queries
118  will be used as part of the matching
119 
120  \return whether or not a match was found
121 
122 */
123 template <typename T1, typename T2>
124 bool SubstructMatch(T1 &mol, const T2 &query, MatchVectType &matchVect,
125  bool recursionPossible = true, bool useChirality = false,
126  bool useQueryQueryMatches = false) {
128  params.recursionPossible = recursionPossible;
129  params.useChirality = useChirality;
130  params.useQueryQueryMatches = useQueryQueryMatches;
131  params.maxMatches = 1;
132  std::vector<MatchVectType> matchVects = SubstructMatch(mol, query, params);
133  if (matchVects.size()) {
134  matchVect = matchVects.front();
135  } else {
136  matchVect.clear();
137  }
138  return matchVect.size() != 0;
139 };
140 
141 //! Find all substructure matches for a query
142 /*!
143  \param mol The object to be searched
144  \param query The query
145  \param matchVect Used to return the matches
146  (pre-existing contents will be deleted)
147  \param uniquify Toggles uniquification (by atom index) of the results
148  \param recursionPossible flags whether or not recursive matches are allowed
149  \param useChirality use atomic CIP codes as part of the comparison
150  \param useQueryQueryMatches if set, the contents of atom and bond queries
151  will be used as part of the matching
152  \param maxMatches The maximum number of matches that will be returned.
153  In high-symmetry cases with medium-sized molecules, it is
154  very
155  easy to end up with a combinatorial explosion in the
156  number of
157  possible matches. This argument prevents that from having
158  unintended consequences
159 
160  \return the number of matches found
161 
162 */
163 template <typename T1, typename T2>
164 unsigned int SubstructMatch(T1 &mol, const T2 &query,
165  std::vector<MatchVectType> &matchVect,
166  bool uniquify = true, bool recursionPossible = true,
167  bool useChirality = false,
168  bool useQueryQueryMatches = false,
169  unsigned int maxMatches = 1000,
170  int numThreads = 1) {
172  params.uniquify = uniquify;
173  params.recursionPossible = recursionPossible;
174  params.useChirality = useChirality;
175  params.useQueryQueryMatches = useQueryQueryMatches;
176  params.maxMatches = maxMatches;
177  params.numThreads = numThreads;
178  matchVect = SubstructMatch(mol, query, params);
179  return static_cast<unsigned int>(matchVect.size());
180 };
181 
182 // ----------------------------------------------
183 //
184 // find one match in ResonanceMolSupplier object
185 //
186 template <>
187 inline bool SubstructMatch(ResonanceMolSupplier &resMolSupplier,
188  const ROMol &query, MatchVectType &matchVect,
189  bool recursionPossible, bool useChirality,
190  bool useQueryQueryMatches) {
192  params.recursionPossible = recursionPossible;
193  params.useChirality = useChirality;
194  params.useQueryQueryMatches = useQueryQueryMatches;
195  params.maxMatches = 1;
196  std::vector<MatchVectType> matchVects =
197  SubstructMatch(resMolSupplier, query, params);
198  if (matchVects.size()) {
199  matchVect = matchVects.front();
200  } else {
201  matchVect.clear();
202  }
203  return matchVect.size() != 0;
204 }
205 
206 template <>
207 inline unsigned int SubstructMatch(ResonanceMolSupplier &resMolSupplier,
208  const ROMol &query,
209  std::vector<MatchVectType> &matchVect,
210  bool uniquify, bool recursionPossible,
211  bool useChirality, bool useQueryQueryMatches,
212  unsigned int maxMatches, int numThreads) {
214  params.uniquify = uniquify;
215  params.recursionPossible = recursionPossible;
216  params.useChirality = useChirality;
217  params.useQueryQueryMatches = useQueryQueryMatches;
218  params.maxMatches = maxMatches;
219  params.numThreads = numThreads;
220  matchVect = SubstructMatch(resMolSupplier, query, params);
221  return static_cast<unsigned int>(matchVect.size());
222 };
223 
224 //! Class used as a final step to confirm whether or not a given atom->atom
225 //! mapping is a valid substructure match.
227  public:
228  MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol,
229  const SubstructMatchParameters &ps);
230 
231  bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]);
232 
233  private:
234  const ROMol &d_query;
235  const ROMol &d_mol;
236  const SubstructMatchParameters &d_params;
237  std::unordered_map<unsigned int, StereoGroup const *> d_molStereoGroups;
238 #ifdef RDK_INTERNAL_BITSET_HAS_HASH
239  // Boost 1.71 added support for std::hash with dynamic_bitset.
240  using HashedStorageType = boost::dynamic_bitset<>;
241 #else
242  // otherwise we use a less elegant solution
243  using HashedStorageType = std::string;
244 #endif
245  std::unordered_set<HashedStorageType> matchesSeen;
246 };
247 
248 } // namespace RDKit
249 
250 #endif
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
MolBundle contains a collection of related ROMols.
Definition: MolBundle.h:39
MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol, const SubstructMatchParameters &ps)
bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[])
#define RDKIT_SUBSTRUCTMATCH_EXPORT
Definition: export.h:489
Std stuff.
Definition: Abbreviations.h:19
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
RDKIT_SUBSTRUCTMATCH_EXPORT std::vector< MatchVectType > SubstructMatch(const ROMol &mol, const ROMol &query, const SubstructMatchParameters &params=SubstructMatchParameters())
Find a substructure match for a query in a molecule.
RDKIT_SUBSTRUCTMATCH_EXPORT void updateSubstructMatchParamsFromJSON(SubstructMatchParameters &params, const std::string &json)
RDKIT_SUBSTRUCTMATCH_EXPORT std::string substructMatchParamsToJSON(const SubstructMatchParameters &params)
unsigned int maxMatches
maximum number of matches to return
bool uniquify
uniquify (by atom index) match results
bool recursionPossible
Allow recursive queries.
std::function< bool(const ROMol &mol, const std::vector< unsigned int > &match)> extraFinalCheck