RDKit
Open-source cheminformatics and machine learning.
FilterCatalog.h
Go to the documentation of this file.
1 // Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 
32 #include <RDGeneral/export.h>
33 #ifndef _RD_FILTER_CATALOG_PARAMS_
34 #define _RD_FILTER_CATALOG_PARAMS_
35 
36 #include <Catalogs/Catalog.h>
37 #include <Catalogs/CatalogParams.h>
38 #include "FilterCatalogEntry.h"
39 
40 namespace RDKit {
41 class FilterCatalog;
43  : public RDCatalog::CatalogParams {
44  public:
46  PAINS_A = (1u << 1),
47  PAINS_B = (1u << 2),
48  PAINS_C = (1u << 3),
49  PAINS = PAINS_A | PAINS_B | PAINS_C,
50 
51  BRENK = (1u << 4),
52  NIH = (1u << 5),
53  ZINC = (1u << 6),
54 
55  CHEMBL_Glaxo = (1u << 7),
56  CHEMBL_Dundee = (1u << 8),
57  CHEMBL_BMS = (1u << 9),
58  CHEMBL_SureChEMBL = (1u << 10),
59  CHEMBL_MLSMR = (1u << 11),
60  CHEMBL_Inpharmatica = (1u << 12),
61  CHEMBL_LINT = (1u << 13),
62  CHEMBL = CHEMBL_Glaxo | CHEMBL_Dundee | CHEMBL_BMS | CHEMBL_SureChEMBL |
63  CHEMBL_MLSMR | CHEMBL_Inpharmatica | CHEMBL_LINT,
64 
65  ALL = PAINS | BRENK | NIH | ZINC | CHEMBL
66  };
67 
68  FilterCatalogParams() : RDCatalog::CatalogParams() {
69  setTypeStr("Filter Catalog Parameters");
70  }
71 
72  FilterCatalogParams(FilterCatalogs catalogs) : RDCatalog::CatalogParams() {
73  setTypeStr("Filter Catalog Parameters");
74  addCatalog(catalogs);
75  }
76 
78  : RDCatalog::CatalogParams(other), d_catalogs(other.d_catalogs) {}
79 
80  ~FilterCatalogParams() override {}
81 
82  //------------------------------------
83  //! Adds an existing FilterCatalog specification to be used in the
84  /// FilterCatalog
85  //
86  /*!
87  Specifies an existing filter catalog to be used.
88 
89  \param catalogs One of the enumerated known FilterCatalogs
90  */
91  virtual bool addCatalog(FilterCatalogs catalogs);
92 
93  //------------------------------------
94  //! Returns the existing list of FilterCatalogs to be used.
95  const std::vector<FilterCatalogs> &getCatalogs() const { return d_catalogs; }
96  //! Fill a catalog with the appropriate entries
97  virtual void fillCatalog(FilterCatalog &catalog) const;
98 
99  //! serializes (pickles) to a stream
100  void toStream(std::ostream &ss) const override;
101  //! returns a string with a serialized (pickled) representation
102  std::string Serialize() const override;
103  //! initializes from a stream pickle
104  void initFromStream(std::istream &ss) override;
105  //! initializes from a string pickle
106  void initFromString(const std::string &text) override;
107 
108  private:
109  std::vector<FilterCatalogs> d_catalogs;
110 
111 #ifdef RDK_USE_BOOST_SERIALIZATION
112  friend class boost::serialization::access;
113  template <class Archive>
114  void serialize(Archive &ar, const unsigned int version) {
115  RDUNUSED_PARAM(version);
116  ar &d_catalogs;
117  }
118 #endif
119 };
120 
123  public:
124  // syntactic sugar for getMatch(es) return values.
125  typedef boost::shared_ptr<FilterCatalogEntry> SENTRY;
126 
127  // If boost::python can support shared_ptr of const objects
128  // we can enable support for this feature
129  typedef boost::shared_ptr<const entryType_t> CONST_SENTRY;
130 
131  FilterCatalog() : FCatalog(), d_entries() {}
132 
134  : FCatalog(), d_entries() {
135  paramType_t temp_params(catalogs);
136  setCatalogParams(&temp_params);
137  }
138 
139  FilterCatalog(const FilterCatalogParams &params) : FCatalog(), d_entries() {
140  setCatalogParams(&params);
141  }
142 
144  : FCatalog(rhs), d_entries(rhs.d_entries) {}
145 
146  FilterCatalog(const std::string &binStr);
147 
148  ~FilterCatalog() override;
149 
150  std::string Serialize() const override;
151 
152  // Adds a new FilterCatalogEntry to the catalog
153  /*!
154  Adds a new FilterCatalogEntry to the catalog The catalog
155  owns the entry
156 
157  \param entry The FilterCatalogEntry to add.
158  \param updateFPLength unused in the FilterCatalog object.
159  */
160 
161  unsigned int addEntry(FilterCatalogEntry *entry,
162  bool updateFPLength = true) override;
163 
164  // Adds a new FilterCatalogEntry to the catalog
165  /*!
166  Adds a new FilterCatalogEntry to the catalog The catalog
167  owns the entry
168 
169  \param entry The shared_ptr of the FilterCatalogEntry to add.
170  \param updateFPLength unused in the FilterCatalog object.
171  */
172 
173  virtual unsigned int addEntry(SENTRY entry, bool updateFPLength = true);
174 
175  // Removes a FilterCatalogEntry to the catalog by description
176  /*!
177  Removes a FilterCatalogEntry from the catalog.
178 
179  \param idx The FilterCatalogEntry index for the entry to remove.
180  n.b. removing an entry may change the indices of other entries.
181  To safely remove entries, remove entries with the highest idx
182  first.
183  */
184  bool removeEntry(unsigned int idx);
186 
187  //------------------------------------
188  //! returns a particular FilterCatalogEntry in the Catalog
189  //! required by Catalog.h API
190  const FilterCatalogEntry *getEntryWithIdx(unsigned int idx) const override;
191 
192  //------------------------------------
193  //! returns a particular FilterCatalogEntry in the Catalog
194  //! memory safe version of getEntryWithIdx
195  CONST_SENTRY getEntry(unsigned int idx) const;
196 
197  //------------------------------------
198  //! returns the idx of the given entry, UINT_MAX if not found.
199 
200  unsigned int getIdxForEntry(const FilterCatalogEntry *entry) const;
201  unsigned int getIdxForEntry(CONST_SENTRY entry) const;
202 
203  //------------------------------------
204  //! returns the number of entries in the catalog
205  unsigned int getNumEntries() const override {
206  return static_cast<unsigned int>(d_entries.size());
207  }
208 
209  //------------------------------------
210  //! Reset the current catalog to match the specified FilterCatalogParameters
211  /*
212  \param params The new FilterCatalogParams specifying the new state of the
213  catalog
214  */
215  void setCatalogParams(const FilterCatalogParams *params) override;
216 
217  //------------------------------------
218  //! Returns true if the molecule matches any entry in the catalog
219  /*
220  \param mol ROMol to match against the catalog
221  */
222  bool hasMatch(const ROMol &mol) const;
223 
224  //------------------------------------
225  //! Returns the first match against the catalog
226  /*
227  \param mol ROMol to match against the catalog
228  */
229  CONST_SENTRY getFirstMatch(const ROMol &mol) const;
230 
231  //-------------------------------------------
232  //! Returns all entry matches to the molecule
233  /*
234  \param mol ROMol to match against the catalog
235  */
236  const std::vector<CONST_SENTRY> getMatches(const ROMol &mol) const;
237 
238  //--------------------------------------------
239  //! Returns all FilterMatches for the molecule
240  /*
241  \param mol ROMol to match against the catalog
242  */
243  const std::vector<FilterMatch> getFilterMatches(const ROMol &mol) const;
244 
245  private:
246  void Clear();
247  std::vector<SENTRY> d_entries;
248 };
249 
251 
252 //! Run a filter catalog on a set of smiles strings
253 /*
254  \param smiles vector of smiles strings to analyze
255  \param nthreads specify the number of threads to use or specify 0 to use all
256  processors [default 1]
257  \returns a vector of vectors. For each input smiles string, returns
258  a vector of shared_ptr::FilterMatchEntry objects.
259  If a molecule matches no filters, the vector will be empty.
260  If a smiles can't be parsed, a 'no valid RDKit molecule'
261  catalog entry is returned.
262 
263 */
265 std::vector<std::vector<boost::shared_ptr<const FilterCatalogEntry>>>
266 RunFilterCatalog(const FilterCatalog &filterCatalog,
267  const std::vector<std::string> &smiles, int numThreads = 1);
268 } // namespace RDKit
269 
270 #endif
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:196
abstract base class for the container used to create a catalog
Definition: CatalogParams.h:18
abstract base class for a catalog object
Definition: Catalog.h:40
paramType paramType_t
Definition: Catalog.h:43
void toStream(std::ostream &ss) const override
serializes (pickles) to a stream
const std::vector< FilterCatalogs > & getCatalogs() const
Returns the existing list of FilterCatalogs to be used.
Definition: FilterCatalog.h:95
FilterCatalogParams(const FilterCatalogParams &other)
Definition: FilterCatalog.h:77
void initFromStream(std::istream &ss) override
initializes from a stream pickle
std::string Serialize() const override
returns a string with a serialized (pickled) representation
virtual void fillCatalog(FilterCatalog &catalog) const
Fill a catalog with the appropriate entries.
void initFromString(const std::string &text) override
initializes from a string pickle
virtual bool addCatalog(FilterCatalogs catalogs)
FilterCatalogParams(FilterCatalogs catalogs)
Definition: FilterCatalog.h:72
FilterCatalog(FilterCatalogParams::FilterCatalogs catalogs)
unsigned int getNumEntries() const override
returns the number of entries in the catalog
bool removeEntry(CONST_SENTRY entry)
unsigned int addEntry(FilterCatalogEntry *entry, bool updateFPLength=true) override
boost::shared_ptr< const entryType_t > CONST_SENTRY
~FilterCatalog() override
CONST_SENTRY getEntry(unsigned int idx) const
const FilterCatalogEntry * getEntryWithIdx(unsigned int idx) const override
FilterCatalog(const FilterCatalogParams &params)
unsigned int getIdxForEntry(const FilterCatalogEntry *entry) const
returns the idx of the given entry, UINT_MAX if not found.
bool removeEntry(unsigned int idx)
unsigned int getIdxForEntry(CONST_SENTRY entry) const
const std::vector< FilterMatch > getFilterMatches(const ROMol &mol) const
Returns all FilterMatches for the molecule.
FilterCatalog(const FilterCatalog &rhs)
bool hasMatch(const ROMol &mol) const
Returns true if the molecule matches any entry in the catalog.
const std::vector< CONST_SENTRY > getMatches(const ROMol &mol) const
Returns all entry matches to the molecule.
void setCatalogParams(const FilterCatalogParams *params) override
Reset the current catalog to match the specified FilterCatalogParameters.
virtual unsigned int addEntry(SENTRY entry, bool updateFPLength=true)
FilterCatalog(const std::string &binStr)
std::string Serialize() const override
return a serialized form of the Catalog as an std::string
boost::shared_ptr< FilterCatalogEntry > SENTRY
CONST_SENTRY getFirstMatch(const ROMol &mol) const
Returns the first match against the catalog.
#define RDKIT_FILTERCATALOG_EXPORT
Definition: export.h:169
Std stuff.
Definition: Abbreviations.h:19
RDCatalog::Catalog< FilterCatalogEntry, FilterCatalogParams > FCatalog
RDKIT_FILTERCATALOG_EXPORT std::vector< std::vector< boost::shared_ptr< const FilterCatalogEntry > > > RunFilterCatalog(const FilterCatalog &filterCatalog, const std::vector< std::string > &smiles, int numThreads=1)
Run a filter catalog on a set of smiles strings.
RDKIT_FILTERCATALOG_EXPORT bool FilterCatalogCanSerialize()