RDKit
Open-source cheminformatics and machine learning.
Catalog.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2003-2006 Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RD_CATALOG_H
13 #define RD_CATALOG_H
14 
15 // Boost graph stuff
17 #include <boost/graph/graph_traits.hpp>
18 #include <boost/graph/adjacency_list.hpp>
19 #include <boost/version.hpp>
20 #if BOOST_VERSION >= 104000
21 #include <boost/property_map/property_map.hpp>
22 #else
23 #include <boost/property_map.hpp>
24 #endif
26 
27 // for some typedefs
28 #include <RDGeneral/types.h>
29 #include <RDGeneral/StreamOps.h>
30 
31 namespace RDCatalog {
32 const int versionMajor = 1;
33 const int versionMinor = 0;
34 const int versionPatch = 0;
35 const int endianId = 0xDEADBEEF;
36 
37 //-----------------------------------------------------------------------------
38 //! abstract base class for a catalog object
39 template <class entryType, class paramType>
40 class Catalog {
41  public:
42  typedef entryType entryType_t;
43  typedef paramType paramType_t;
44 
45  //------------------------------------
46  Catalog() : dp_cParams(nullptr) {}
47 
48  //------------------------------------
49  virtual ~Catalog() { delete dp_cParams; }
50 
51  //------------------------------------
52  //! return a serialized form of the Catalog as an std::string
53  virtual std::string Serialize() const = 0;
54 
55  //------------------------------------
56  //! adds an entry to the catalog
57  /*!
58 
59  \param entry the entry to be added
60  \param updateFPLength (optional) if this is true, our internal
61  fingerprint length will also be updated.
62 
63  */
64  virtual unsigned int addEntry(entryType *entry,
65  bool updateFPLength = true) = 0;
66 
67  //------------------------------------
68  //! returns a particular entry in the Catalog
69  virtual const entryType *getEntryWithIdx(unsigned int idx) const = 0;
70 
71  //------------------------------------
72  //! returns the number of entries
73  virtual unsigned int getNumEntries() const = 0;
74 
75  //------------------------------------
76  //! returns the length of our fingerprint
77  unsigned int getFPLength() const { return d_fpLength; }
78 
79  //------------------------------------
80  //! sets our fingerprint length
81  void setFPLength(unsigned int val) { d_fpLength = val; }
82 
83  //------------------------------------
84  //! sets our parameters by copying the \c params argument
85  virtual void setCatalogParams(const paramType *params) {
86  PRECONDITION(params, "bad parameter object");
87  // if we already have a parameter object throw an exception
89  "A parameter object already exists on the catalog");
90  /*
91  if (dp_cParams) {
92  // we already have parameter object on the catalog
93  // can't overwrite it
94  PRECONDITION(0, "A parameter object already exist on the catalog");
95  }*/
96  dp_cParams = new paramType(*params);
97  }
98 
99  //------------------------------------
100  //! returns a pointer to our parameters
101  const paramType *getCatalogParams() const { return dp_cParams; }
102 
103  protected:
104  // this is the ID that will be assigned to the next entry
105  // added to the catalog - need not be same as the number of entries
106  // in the catalog and does not correspond with the
107  // id of the entry in the catalog.
108  // this is more along the lines of bitId
109  unsigned int d_fpLength{0}; //!< the length of our fingerprint
110  paramType *dp_cParams; //!< our params object
111 };
112 
113 //-----------------------------------------------------------------------------
114 //! A Catalog with a hierarchical structure
115 /*!
116 
117  The entries of a HierarchCatalog are arranged in a directed graph
118 
119  <b>The difference between <i>Indices</i> and <i>Bit Ids</i></b>
120 
121  A HierarchCatalog may contain more entries than the user is actually
122  interested in. For example a HierarchCatalog constructed to contain
123  orders 5 through 8 may well contain information about orders 1-5,
124  in order to facilitate some search optimizations.
125 
126  - <i>Bit Ids</i> refer to the "interesting" bits.
127  So, in the above example, Bit Id \c 0 will be the first entry
128  with order 5.
129  - <i>Indices</i> refer to the underlying structure of the catalog.
130  So, in the above example, the entry with index \c 0 will be
131  the first entry with order 1.
132 
133 */
134 template <class entryType, class paramType, class orderType>
135 class HierarchCatalog : public Catalog<entryType, paramType> {
136  // the entries in the catalog can be traversed using the edges
137  // in a desired order
138  public:
139  //! used by the BGL to set up the node properties in our graph
140  struct vertex_entry_t {
141  enum { num = 1003 };
142  typedef boost::vertex_property_tag kind;
143  };
144  typedef boost::property<vertex_entry_t, entryType *> EntryProperty;
145 
146  //! the type of the graph itself:
147  typedef boost::adjacency_list<
148  boost::vecS,
149  boost::vecS, // FIX: should be using setS for edges so that parallel
150  // edges are never added (page 225 BGL book)
151  // but that seems result in compile errors
152  boost::bidirectionalS, EntryProperty>
154 
155  typedef boost::graph_traits<CatalogGraph> CAT_GRAPH_TRAITS;
156  typedef typename CAT_GRAPH_TRAITS::vertex_iterator VER_ITER;
157  typedef std::pair<VER_ITER, VER_ITER> ENT_ITER_PAIR;
158  typedef typename CAT_GRAPH_TRAITS::adjacency_iterator DOWN_ENT_ITER;
159  typedef std::pair<DOWN_ENT_ITER, DOWN_ENT_ITER> DOWN_ENT_ITER_PAIR;
160 
161  //------------------------------------
163 
164  //------------------------------------
165  //! Construct by making a copy of the input \c params object
166  HierarchCatalog(const paramType *params) : Catalog<entryType, paramType>() {
167  this->setCatalogParams(params);
168  }
169 
170  //------------------------------------
171  //! Construct from a \c pickle (a serialized form of the HierarchCatalog)
172  HierarchCatalog(const std::string &pickle) { this->initFromString(pickle); }
173 
174  //------------------------------------
175  ~HierarchCatalog() override { destroy(); }
176 
177  //------------------------------------
178  //! serializes this object to a stream
179  void toStream(std::ostream &ss) const {
180  PRECONDITION(this->getCatalogParams(), "NULL parameter object");
181 
182  // the i/o header:
187 
188  // information about the catalog itself:
189  int tmpUInt;
190  tmpUInt = this->getFPLength();
191  RDKit::streamWrite(ss, tmpUInt);
192  tmpUInt = this->getNumEntries();
193  RDKit::streamWrite(ss, tmpUInt);
194 
195  // std::cout << ">>>>-------------------------------" << std::endl;
196  // std::cout << "\tlength: " << getFPLength() << " " << getNumEntries() <<
197  // std::endl;
198 
199  // add the params object:
200  this->getCatalogParams()->toStream(ss);
201  // std::cout << "\tparams: " << getCatalogParams()->getLowerFragLength();
202  // std::cout << " " << getCatalogParams()->getUpperFragLength();
203  // std::cout << " " << getCatalogParams()->getNumFuncGroups();
204  // std::cout << std::endl;
205 
206  // write the entries in order:
207  for (unsigned int i = 0; i < getNumEntries(); i++) {
208  this->getEntryWithIdx(i)->toStream(ss);
209  }
210 
211  // finally the adjacency list:
212  for (unsigned int i = 0; i < getNumEntries(); i++) {
213  RDKit::INT_VECT children = this->getDownEntryList(i);
214  tmpUInt = static_cast<unsigned int>(children.size());
215  RDKit::streamWrite(ss, tmpUInt);
216  for (RDKit::INT_VECT::const_iterator ivci = children.begin();
217  ivci != children.end(); ivci++) {
218  RDKit::streamWrite(ss, *ivci);
219  }
220  }
221  }
222 
223  //------------------------------------
224  //! serializes this object and returns the resulting \c pickle
225  std::string Serialize() const override {
226  std::stringstream ss(std::ios_base::binary | std::ios_base::out |
227  std::ios_base::in);
228  this->toStream(ss);
229  return ss.str();
230  }
231 
232  //------------------------------------
233  //! fills the contents of this object from a stream containing a \c pickle
234  void initFromStream(std::istream &ss) {
235  int tmpInt;
236  // FIX: at the moment we ignore the header info:
237  RDKit::streamRead(ss, tmpInt);
238  RDKit::streamRead(ss, tmpInt);
239  RDKit::streamRead(ss, tmpInt);
240  RDKit::streamRead(ss, tmpInt);
241 
242  unsigned int tmpUInt;
243  RDKit::streamRead(ss, tmpUInt); // fp length
244  this->setFPLength(tmpUInt);
245 
246  unsigned int numEntries;
247  RDKit::streamRead(ss, numEntries);
248  // std::cout << "<<<-------------------------------" << std::endl;
249  // std::cout << "\tlength: " << getFPLength() << " " << numEntries <<
250  // std::endl;
251 
252  // grab the params:
253  paramType *params = new paramType();
254  params->initFromStream(ss);
255  this->setCatalogParams(params);
256  delete params;
257 
258  // std::cout << "\tparams: " << getCatalogParams()->getLowerFragLength();
259  // std::cout << " " << getCatalogParams()->getUpperFragLength();
260  // std::cout << " " << getCatalogParams()->getNumFuncGroups();
261  // std::cout << std::endl;
262 
263  // now all of the entries:
264  for (unsigned int i = 0; i < numEntries; i++) {
265  entryType *entry = new entryType();
266  entry->initFromStream(ss);
267  this->addEntry(entry, false);
268  }
269 
270  // and, finally, the adjacency list:
271  for (unsigned int i = 0; i < numEntries; i++) {
272  unsigned int nNeighbors;
273  RDKit::streamRead(ss, nNeighbors);
274  for (unsigned int j = 0; j < nNeighbors; j++) {
275  RDKit::streamRead(ss, tmpInt);
276  this->addEdge(i, tmpInt);
277  }
278  }
279  }
280 
281  //------------------------------------
282  unsigned int getNumEntries() const override {
283  return static_cast<unsigned int>(boost::num_vertices(d_graph));
284  }
285 
286  //------------------------------------
287  //! fills the contents of this object from a string containing a \c pickle
288  void initFromString(const std::string &text) {
289  std::stringstream ss(std::ios_base::binary | std::ios_base::out |
290  std::ios_base::in);
291  // initialize the stream:
292  ss.write(text.c_str(), text.length());
293  // now start reading out values:
294  this->initFromStream(ss);
295  }
296 
297  //------------------------------------
298  //! add a new entry to the catalog
299  /*!
300 
301  \param entry the entry to be added
302  \param updateFPLength (optional) if this is true, our internal
303  fingerprint length will also be updated.
304 
305  */
306  unsigned int addEntry(entryType *entry, bool updateFPLength = true) override {
307  PRECONDITION(entry, "bad arguments");
308  if (updateFPLength) {
309  unsigned int fpl = this->getFPLength();
310  entry->setBitId(fpl);
311  fpl++;
312  this->setFPLength(fpl);
313  }
314  unsigned int eid = static_cast<unsigned int>(
315  boost::add_vertex(EntryProperty(entry), d_graph));
316  orderType etype = entry->getOrder();
317  // REVIEW: this initialization is not required: the STL map, in
318  // theory, will create a new object when operator[] is called
319  // for a new item
320  if (d_orderMap.find(etype) == d_orderMap.end()) {
321  RDKit::INT_VECT nets;
322  d_orderMap[etype] = nets;
323  }
324  d_orderMap[etype].push_back(eid);
325  return eid;
326  }
327 
328  //------------------------------------
329  //! adds an edge between two entries in the catalog
330  /*!
331  Since we are using a bidirectional graph - the order in
332  which the ids are supplied here makes a difference
333 
334  \param id1 index of the edge's beginning
335  \param id2 index of the edge's end
336 
337  */
338  void addEdge(unsigned int id1, unsigned int id2) {
339  unsigned int nents = getNumEntries();
340  URANGE_CHECK(id1, nents);
341  URANGE_CHECK(id2, nents);
342  // FIX: if we boost::setS for the edgeList BGL will
343  // do the checking for duplicity (parallel edges)
344  // But for reasons unknown setS results in compile
345  // errors while using adjacent_vertices.
346  typename CAT_GRAPH_TRAITS::edge_descriptor edge;
347  bool found;
348  boost::tie(edge, found) = boost::edge(boost::vertex(id1, d_graph),
349  boost::vertex(id2, d_graph), d_graph);
350  if (!found) {
351  boost::add_edge(id1, id2, d_graph);
352  }
353  }
354 
355  //------------------------------------
356  //! returns a pointer to our entry with a particular index
357  const entryType *getEntryWithIdx(unsigned int idx) const override {
358  URANGE_CHECK(idx, getNumEntries());
359  int vd = static_cast<int>(boost::vertex(idx, d_graph));
360  typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
361  pMap = boost::get(vertex_entry_t(), d_graph);
362  return pMap[vd];
363  }
364 
365  //------------------------------------
366  //! returns a pointer to our entry with a particular bit ID
367  const entryType *getEntryWithBitId(unsigned int idx) const {
368  URANGE_CHECK(idx, this->getFPLength());
369  typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
370  pMap = boost::get(vertex_entry_t(), d_graph);
371  const entryType *res = nullptr;
372  for (unsigned int i = idx; i < this->getNumEntries(); i++) {
373  const entryType *e = pMap[i];
374  if (e->getBitId() == static_cast<int>(idx)) {
375  res = e;
376  break;
377  }
378  }
379  return res;
380  }
381 
382  //------------------------------------
383  //! returns the index of the entry with a particular bit ID
384  int getIdOfEntryWithBitId(unsigned int idx) const {
385  URANGE_CHECK(idx, this->getFPLength());
386  typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
387  pMap = boost::get(vertex_entry_t(), d_graph);
388  int res = -1;
389  for (unsigned int i = idx; i < this->getNumEntries(); i++) {
390  const entryType *e = pMap[i];
391  if (static_cast<unsigned int>(e->getBitId()) == idx) {
392  res = i;
393  break;
394  }
395  }
396  return res;
397  }
398 
399  //------------------------------------
400  //! returns a list of the indices of entries below the one passed in
401  RDKit::INT_VECT getDownEntryList(unsigned int idx) const {
402  RDKit::INT_VECT res;
403  DOWN_ENT_ITER nbrIdx, endIdx;
404  boost::tie(nbrIdx, endIdx) = boost::adjacent_vertices(idx, d_graph);
405  while (nbrIdx != endIdx) {
406  res.push_back(static_cast<int>(*nbrIdx));
407  nbrIdx++;
408  }
409  // std::cout << res.size() << "\n";
410  return res;
411  }
412 
413  //------------------------------------
414  //! returns a list of the indices that have a particular order
415  const RDKit::INT_VECT &getEntriesOfOrder(orderType ord) {
416  return d_orderMap[ord];
417  }
418 
419  //------------------------------------
420  //! returns a list of the indices that have a particular order
421  /*!
422  \overload
423  */
424  const RDKit::INT_VECT &getEntriesOfOrder(orderType ord) const {
425  typename std::map<orderType, RDKit::INT_VECT>::const_iterator elem;
426  elem = d_orderMap.find(ord);
428  elem != d_orderMap.end(),
429  " catalog does not contain any entries of the order specified");
430  return elem->second;
431  }
432 
433  private:
434  // graphs that store the entries in the catalog in a hierarchical manner
435  CatalogGraph d_graph;
436  // a map that maps the order type of entries in the catalog to
437  // a vector of vertex indices in the graphs above
438  // e.g. for a catalog with molecular fragments, the order of a fragment can
439  // simply be the number of bond in it. The list this oder maps to is all the
440  // vertex ids of these fragment in the catalog that have this many bonds in
441  // them
442  std::map<orderType, RDKit::INT_VECT> d_orderMap;
443 
444  //------------------------------------
445  //! clear any memory that we've used
446  void destroy() {
447  ENT_ITER_PAIR entItP = boost::vertices(d_graph);
448  typename boost::property_map<CatalogGraph, vertex_entry_t>::type pMap =
449  boost::get(vertex_entry_t(), d_graph);
450  while (entItP.first != entItP.second) {
451  delete pMap[*(entItP.first++)];
452  }
453  }
454 };
455 
456 } // namespace RDCatalog
457 
458 #endif
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:101
#define URANGE_CHECK(x, hi)
Definition: Invariant.h:142
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
abstract base class for a catalog object
Definition: Catalog.h:40
virtual std::string Serialize() const =0
return a serialized form of the Catalog as an std::string
virtual unsigned int addEntry(entryType *entry, bool updateFPLength=true)=0
adds an entry to the catalog
virtual const entryType * getEntryWithIdx(unsigned int idx) const =0
returns a particular entry in the Catalog
unsigned int getFPLength() const
returns the length of our fingerprint
Definition: Catalog.h:77
virtual void setCatalogParams(const paramType *params)
sets our parameters by copying the params argument
Definition: Catalog.h:85
entryType entryType_t
Definition: Catalog.h:42
virtual unsigned int getNumEntries() const =0
returns the number of entries
virtual ~Catalog()
Definition: Catalog.h:49
paramType * dp_cParams
our params object
Definition: Catalog.h:110
unsigned int d_fpLength
the length of our fingerprint
Definition: Catalog.h:109
void setFPLength(unsigned int val)
sets our fingerprint length
Definition: Catalog.h:81
paramType paramType_t
Definition: Catalog.h:43
const paramType * getCatalogParams() const
returns a pointer to our parameters
Definition: Catalog.h:101
A Catalog with a hierarchical structure.
Definition: Catalog.h:135
boost::graph_traits< CatalogGraph > CAT_GRAPH_TRAITS
Definition: Catalog.h:155
const RDKit::INT_VECT & getEntriesOfOrder(orderType ord) const
returns a list of the indices that have a particular order
Definition: Catalog.h:424
CAT_GRAPH_TRAITS::vertex_iterator VER_ITER
Definition: Catalog.h:156
CAT_GRAPH_TRAITS::adjacency_iterator DOWN_ENT_ITER
Definition: Catalog.h:158
HierarchCatalog(const paramType *params)
Construct by making a copy of the input params object.
Definition: Catalog.h:166
RDKit::INT_VECT getDownEntryList(unsigned int idx) const
returns a list of the indices of entries below the one passed in
Definition: Catalog.h:401
int getIdOfEntryWithBitId(unsigned int idx) const
returns the index of the entry with a particular bit ID
Definition: Catalog.h:384
const entryType * getEntryWithBitId(unsigned int idx) const
returns a pointer to our entry with a particular bit ID
Definition: Catalog.h:367
unsigned int getNumEntries() const override
returns the number of entries
Definition: Catalog.h:282
std::string Serialize() const override
serializes this object and returns the resulting pickle
Definition: Catalog.h:225
const entryType * getEntryWithIdx(unsigned int idx) const override
returns a pointer to our entry with a particular index
Definition: Catalog.h:357
void initFromStream(std::istream &ss)
fills the contents of this object from a stream containing a pickle
Definition: Catalog.h:234
HierarchCatalog(const std::string &pickle)
Construct from a pickle (a serialized form of the HierarchCatalog)
Definition: Catalog.h:172
boost::adjacency_list< boost::vecS, boost::vecS, boost::bidirectionalS, EntryProperty > CatalogGraph
the type of the graph itself:
Definition: Catalog.h:153
std::pair< VER_ITER, VER_ITER > ENT_ITER_PAIR
Definition: Catalog.h:157
void initFromString(const std::string &text)
fills the contents of this object from a string containing a pickle
Definition: Catalog.h:288
~HierarchCatalog() override
Definition: Catalog.h:175
boost::property< vertex_entry_t, entryType * > EntryProperty
Definition: Catalog.h:144
std::pair< DOWN_ENT_ITER, DOWN_ENT_ITER > DOWN_ENT_ITER_PAIR
Definition: Catalog.h:159
const RDKit::INT_VECT & getEntriesOfOrder(orderType ord)
returns a list of the indices that have a particular order
Definition: Catalog.h:415
unsigned int addEntry(entryType *entry, bool updateFPLength=true) override
add a new entry to the catalog
Definition: Catalog.h:306
void addEdge(unsigned int id1, unsigned int id2)
adds an edge between two entries in the catalog
Definition: Catalog.h:338
void toStream(std::ostream &ss) const
serializes this object to a stream
Definition: Catalog.h:179
const int endianId
Definition: Catalog.h:35
const int versionMinor
Definition: Catalog.h:33
const int versionMajor
Definition: Catalog.h:32
const int versionPatch
Definition: Catalog.h:34
RDKIT_CHEMREACTIONS_EXPORT void pickle(const boost::shared_ptr< EnumerationStrategyBase > &enumerator, std::ostream &ss)
pickles a EnumerationStrategy and adds the results to a stream ss
std::vector< int > INT_VECT
Definition: types.h:279
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
Definition: StreamOps.h:282
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream
Definition: StreamOps.h:260
used by the BGL to set up the node properties in our graph
Definition: Catalog.h:140
boost::vertex_property_tag kind
Definition: Catalog.h:142
@ num
Definition: Catalog.h:141