RDKit
Open-source cheminformatics and machine learning.
SubstructLibrarySerialization.h
Go to the documentation of this file.
1 // Copyright (c) 2019, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 // n.b. must be included at the END of SubstructLibrary.h
32 #ifndef RDK_SUBSTRUCT_LIBRARY_SERIALIZATION
33 #define RDK_SUBSTRUCT_LIBRARY_SERIALIZATION
34 
35 #ifdef RDK_USE_BOOST_SERIALIZATION
37 #include <boost/archive/text_oarchive.hpp>
38 #include <boost/archive/text_iarchive.hpp>
39 #include <boost/serialization/vector.hpp>
40 #include <boost/serialization/shared_ptr.hpp>
41 #include <boost/archive/archive_exception.hpp>
43 
44 BOOST_SERIALIZATION_ASSUME_ABSTRACT(RDKit::MolHolderBase)
45 BOOST_SERIALIZATION_ASSUME_ABSTRACT(RDKit::FPHolderBase)
46 
47 namespace boost {
48 namespace serialization {
49 
50 template <class Archive>
51 void serialize(Archive &, RDKit::MolHolderBase &, const unsigned int) {}
52 
53 template <class Archive>
54 void save(Archive &ar, const RDKit::MolHolder &molholder,
55  const unsigned int version) {
56  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
57 
58  if (version < 2) {
59  std::int64_t pkl_count = molholder.getMols().size();
60  ar &pkl_count;
61 
62  for (auto &mol : molholder.getMols()) {
63  std::string pkl;
64  RDKit::MolPickler::pickleMol(*mol.get(), pkl);
65  ar << pkl;
66  }
67  } else {
68  ar &molholder.getMols();
69  }
70 }
71 
72 template <class Archive>
73 void load(Archive &ar, RDKit::MolHolder &molholder,
74  const unsigned int version) {
75  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
76 
77  std::vector<boost::shared_ptr<RDKit::ROMol>> &mols = molholder.getMols();
78  mols.clear();
79 
80  if (version < 2) {
81  std::int64_t pkl_count = -1;
82  ar &pkl_count;
83 
84  for (std::int64_t i = 0; i < pkl_count; ++i) {
85  std::string pkl;
86  ar >> pkl;
87  mols.push_back(boost::make_shared<RDKit::ROMol>(pkl));
88  }
89  } else {
90  ar &mols;
91  }
92 }
93 
94 template <class Archive, class MolHolder>
95 void serialize_strings(Archive &ar, MolHolder &molholder,
96  const unsigned int version) {
97  RDUNUSED_PARAM(version);
98  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
99  ar &molholder.getMols();
100 }
101 
102 template <class Archive>
103 void serialize(Archive &ar, RDKit::CachedMolHolder &molholder,
104  const unsigned int version) {
105  serialize_strings(ar, molholder, version);
106 }
107 
108 template <class Archive>
109 void serialize(Archive &ar, RDKit::CachedSmilesMolHolder &molholder,
110  const unsigned int version) {
111  serialize_strings(ar, molholder, version);
112 }
113 
114 template <class Archive>
115 void serialize(Archive &ar, RDKit::CachedTrustedSmilesMolHolder &molholder,
116  const unsigned int version) {
117  serialize_strings(ar, molholder, version);
118 }
119 
120 template <class Archive>
121 void save(Archive &ar, const RDKit::FPHolderBase &fpholder,
122  const unsigned int version) {
123  RDUNUSED_PARAM(version);
124  std::vector<std::string> pickles;
125  for (auto &fp : fpholder.getFingerprints()) {
126  pickles.push_back(fp->toString());
127  }
128  ar &pickles;
129 }
130 
131 template <class Archive>
132 void load(Archive &ar, RDKit::FPHolderBase &fpholder,
133  const unsigned int version) {
134  RDUNUSED_PARAM(version);
135  std::vector<std::string> pickles;
136  std::vector<ExplicitBitVect *> &fps = fpholder.getFingerprints();
137 
138  ar &pickles;
139  for (size_t i = 0; i < fps.size(); ++i) {
140  delete fps[i];
141  }
142  fps.clear();
143 
144  for (auto &pkl : pickles) {
145  fps.push_back(new ExplicitBitVect(pkl));
146  }
147 }
148 
149 template <class Archive>
150 void serialize(Archive &ar, RDKit::PatternHolder &pattern_holder,
151  const unsigned int version) {
152  RDUNUSED_PARAM(version);
153  ar &boost::serialization::base_object<RDKit::FPHolderBase>(pattern_holder);
154  if (Archive::is_saving::value &&
155  pattern_holder.getNumBits() != RDKit::PatternHolder::defaultNumBits()) {
156  ar &pattern_holder.getNumBits();
157  } else if (Archive::is_loading::value) {
158  try {
159  ar &pattern_holder.getNumBits();
160  } catch (boost::archive::archive_exception &) {
162  }
163  }
164 }
165 
166 template <class Archive>
167 void serialize(Archive &ar, RDKit::TautomerPatternHolder &pattern_holder,
168  const unsigned int version) {
169  RDUNUSED_PARAM(version);
170  ar &boost::serialization::base_object<RDKit::FPHolderBase>(pattern_holder);
171  ar &pattern_holder.getNumBits();
172 }
173 
174 template <class Archive>
175 void serialize(Archive &, RDKit::KeyHolderBase &, const unsigned int) {}
176 
177 template <class Archive>
178 void serialize(Archive &ar, RDKit::KeyFromPropHolder &key_holder,
179  const unsigned int) {
180  ar &boost::serialization::base_object<RDKit::KeyHolderBase>(key_holder);
181  ar &key_holder.getPropName();
182  ar &key_holder.getKeys();
183 }
184 
185 template <class Archive>
186 void registerSubstructLibraryTypes(Archive &ar) {
187  ar.register_type(static_cast<RDKit::MolHolder *>(nullptr));
188  ar.register_type(static_cast<RDKit::CachedMolHolder *>(nullptr));
189  ar.register_type(static_cast<RDKit::CachedSmilesMolHolder *>(nullptr));
190  ar.register_type(static_cast<RDKit::CachedTrustedSmilesMolHolder *>(nullptr));
191  ar.register_type(static_cast<RDKit::PatternHolder *>(nullptr));
192  ar.register_type(static_cast<RDKit::TautomerPatternHolder *>(nullptr));
193  ar.register_type(static_cast<RDKit::KeyFromPropHolder *>(nullptr));
194 }
195 
196 template <class Archive>
197 void save(Archive &ar, const RDKit::SubstructLibrary &slib,
198  const unsigned int version) {
199  RDUNUSED_PARAM(version);
200  registerSubstructLibraryTypes(ar);
201  ar &slib.getSearchOrder();
202  ar &slib.getKeyHolder();
203  ar &slib.getMolHolder();
204  ar &slib.getFpHolder();
205 }
206 
207 template <class Archive>
208 void load(Archive &ar, RDKit::SubstructLibrary &slib,
209  const unsigned int version) {
210  RDUNUSED_PARAM(version);
211  registerSubstructLibraryTypes(ar);
212  if (version > 1) {
213  ar &slib.getSearchOrder();
214  ar &slib.getKeyHolder();
215  }
216  ar &slib.getMolHolder();
217  ar &slib.getFpHolder();
218  slib.resetHolders();
219 }
220 
221 } // end namespace serialization
222 } // end namespace boost
223 
224 BOOST_CLASS_VERSION(RDKit::MolHolder, 2);
225 BOOST_CLASS_VERSION(RDKit::CachedMolHolder, 1);
226 BOOST_CLASS_VERSION(RDKit::CachedSmilesMolHolder, 1);
227 BOOST_CLASS_VERSION(RDKit::CachedTrustedSmilesMolHolder, 1);
228 BOOST_CLASS_VERSION(RDKit::PatternHolder, 1);
229 BOOST_CLASS_VERSION(RDKit::TautomerPatternHolder, 1);
230 BOOST_CLASS_VERSION(RDKit::SubstructLibrary, 2);
231 
232 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::MolHolder);
233 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::FPHolderBase);
234 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::SubstructLibrary);
235 
236 #endif
237 #endif
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:196
a class for bit vectors that are densely occupied
Concrete class that holds binary cached molecules in memory.
Concrete class that holds smiles strings in memory.
Concrete class that holds trusted smiles strings in memory.
Base FPI for the fingerprinter used to rule out impossible matches.
std::vector< ExplicitBitVect * > & getFingerprints()
std::vector< std::string > & getKeys()
Base class API for holding molecules to substructure search.
Concrete class that holds molecules in memory.
std::vector< boost::shared_ptr< ROMol > > & getMols()
static void pickleMol(const ROMol *mol, std::ostream &ss)
pickles a molecule and sends the results to stream ss
static unsigned int defaultNumBits()
const unsigned int & getNumBits() const
Substructure Search a library of molecules.
boost::shared_ptr< MolHolderBase > & getMolHolder()
Get the underlying molecule holder implementation.
boost::shared_ptr< KeyHolderBase > & getKeyHolder()
Get the underlying molecule holder implementation.
boost::shared_ptr< FPHolderBase > & getFpHolder()
Get the underlying molecule holder implementation.
const std::vector< unsigned int > & getSearchOrder() const
void resetHolders()
access required for serialization
Definition: RDLog.h:25