RDKit
Open-source cheminformatics and machine learning.
ScaffoldNetwork.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2019 Greg Landrum and T5 Informatics GmbH
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_SCAFFOLDNETWORK_H
12 #define RD_SCAFFOLDNETWORK_H
13 
14 #include <vector>
15 #include <map>
16 #include <string>
17 #include <sstream>
18 #include <memory>
19 #include <iostream>
20 
21 #ifdef RDK_USE_BOOST_SERIALIZATION
22 #include <RDGeneral/Invariant.h>
24 #include <boost/archive/text_oarchive.hpp>
25 #include <boost/archive/text_iarchive.hpp>
26 #include <boost/serialization/vector.hpp>
27 #include <boost/serialization/shared_ptr.hpp>
28 #include <boost/serialization/version.hpp>
30 #endif
31 
32 namespace RDKit {
33 class ROMol;
34 class ChemicalReaction;
35 
36 namespace ScaffoldNetwork {
37 
39  bool includeGenericScaffolds =
40  true; ///< include scaffolds with all atoms replaced by dummies
41  bool includeGenericBondScaffolds =
42  false; ///< include scaffolds with all bonds replaced by single bonds
43  bool includeScaffoldsWithoutAttachments =
44  true; ///< remove attachment points from scaffolds and include the result
45  bool includeScaffoldsWithAttachments =
46  true; ///< Include the version of the scaffold with attachment points
47  bool keepOnlyFirstFragment =
48  true; ///< keep only the first fragment from the bond breaking rule
49  bool pruneBeforeFragmenting =
50  true; ///< Do a pruning/flattening step before starting fragmenting
51  bool flattenIsotopes = true; ///< remove isotopes when flattening
52  bool flattenChirality =
53  true; ///< remove chirality and bond stereo when flattening
54  bool flattenKeepLargest =
55  true; ///< keep only the largest fragment when doing flattening
56  bool collectMolCounts = true; ///< keep track of the number of molecules each
57  ///< scaffold was reached from
58 
59  std::vector<std::shared_ptr<ChemicalReaction>>
60  bondBreakersRxns; ///< the reaction(s) used to fragment. Should expect a
61  ///< single reactant and produce two products
63  : ScaffoldNetworkParams{{"[!#0;R:1]-!@[!#0:2]>>[*:1]-[#0].[#0]-[*:2]"}} {}
64  ScaffoldNetworkParams(const std::vector<std::string> &bondBreakersSmarts);
65 };
66 
67 enum class EdgeType {
68  Fragment = 1, ///< molecule -> fragment
69  Generic = 2, ///< molecule -> generic molecule (all atoms are dummies)
70  GenericBond = 3, ///< molecule -> generic bond molecule (all bonds single)
71  RemoveAttachment = 4, ///< molecule -> molecule with no attachment points
72  Initialize = 5 ///< molecule -> flattened molecule
73 };
74 
76  size_t beginIdx;
77  size_t endIdx;
79  NetworkEdge() : beginIdx(0), endIdx(0), type(EdgeType::Initialize) {}
80  NetworkEdge(size_t bi, size_t ei, EdgeType typ)
81  : beginIdx(bi), endIdx(ei), type(typ) {}
83  return (beginIdx == o.beginIdx) && (endIdx == o.endIdx) && (type == o.type);
84  }
86  return (beginIdx != o.beginIdx) || (endIdx != o.endIdx) || (type != o.type);
87  }
88 #ifdef RDK_USE_BOOST_SERIALIZATION
89  private:
90  friend class boost::serialization::access;
91  template <class Archive>
92  void serialize(Archive &ar, const unsigned int version) {
93  RDUNUSED_PARAM(version);
94  ar &beginIdx;
95  ar &endIdx;
96  ar &type;
97  }
98 #endif
99 };
100 
102  std::vector<std::string> nodes; ///< SMILES for the scaffolds
103  std::vector<unsigned>
104  counts; ///< number of times each scaffold was encountered
105  std::vector<unsigned>
106  molCounts; ///< number of molecules each scaffold was found in
107  std::vector<NetworkEdge> edges; ///< edges in the network
109 #ifdef RDK_USE_BOOST_SERIALIZATION
110  ScaffoldNetwork(const std::string &pkl) {
111  std::stringstream iss(pkl);
112  boost::archive::text_iarchive ia(iss);
113  ia >> *this;
114  }
115 
116  private:
117  friend class boost::serialization::access;
118  template <class Archive>
119  void serialize(Archive &ar, const unsigned int version) {
120  RDUNUSED_PARAM(version);
121  ar &nodes;
122  ar &counts;
123  if (version > 0) {
124  ar &molCounts;
125  }
126  ar &edges;
127  }
128 #endif
129 };
130 
131 //! update an existing ScaffoldNetwork using a set of molecules
132 template <typename T>
133 void updateScaffoldNetwork(const T &mols, ScaffoldNetwork &network,
134  const ScaffoldNetworkParams &params);
135 
136 //! create a new ScaffoldNetwork for a set of molecules
137 template <typename T>
139  const ScaffoldNetworkParams &params) {
140  ScaffoldNetwork res;
141  updateScaffoldNetwork(mols, res, params);
142  return res;
143 }
144 //! allows nodes to output nicely as strings
145 inline std::ostream &operator<<(std::ostream &ostr,
147  switch (e) {
149  ostr << "Fragment";
150  break;
152  ostr << "Generic";
153  break;
155  ostr << "GenericBond";
156  break;
158  ostr << "RemoveAttachment";
159  break;
161  ostr << "Initialize";
162  break;
163  default:
164  ostr << "UNKNOWN";
165  break;
166  }
167  return ostr;
168 }
169 //! allows edges to output nicely as strings
170 inline std::ostream &operator<<(std::ostream &ostr,
172  ostr << "NetworkEdge( " << e.beginIdx << "->" << e.endIdx
173  << ", type:" << e.type << " )";
174  return ostr;
175 }
176 
177 //! returns parameters for constructing scaffold networks using BRICS
178 //! fragmentation
180 
181 } // namespace ScaffoldNetwork
182 } // namespace RDKit
183 
184 #ifdef RDK_USE_BOOST_SERIALIZATION
185 namespace boost {
186 namespace serialization {
187 template <>
188 struct version<RDKit::ScaffoldNetwork::ScaffoldNetwork> {
189  BOOST_STATIC_CONSTANT(int, value = 1);
190 };
191 } // namespace serialization
192 } // namespace boost
193 #endif
194 
195 #endif
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:196
#define RDKIT_SCAFFOLDNETWORK_EXPORT
Definition: export.h:433
ScaffoldNetwork createScaffoldNetwork(const T &mols, const ScaffoldNetworkParams &params)
create a new ScaffoldNetwork for a set of molecules
void updateScaffoldNetwork(const T &mols, ScaffoldNetwork &network, const ScaffoldNetworkParams &params)
update an existing ScaffoldNetwork using a set of molecules
std::ostream & operator<<(std::ostream &ostr, const RDKit::ScaffoldNetwork::NetworkEdge &e)
allows edges to output nicely as strings
@ Initialize
molecule -> flattened molecule
@ Fragment
molecule -> fragment
@ Generic
molecule -> generic molecule (all atoms are dummies)
@ RemoveAttachment
molecule -> molecule with no attachment points
@ GenericBond
molecule -> generic bond molecule (all bonds single)
RDKIT_SCAFFOLDNETWORK_EXPORT ScaffoldNetworkParams getBRICSNetworkParams()
Std stuff.
Definition: Abbreviations.h:19
Definition: RDLog.h:25
bool operator==(const RDKit::ScaffoldNetwork::NetworkEdge &o) const
NetworkEdge(size_t bi, size_t ei, EdgeType typ)
bool operator!=(const RDKit::ScaffoldNetwork::NetworkEdge &o) const
std::vector< std::shared_ptr< ChemicalReaction > > bondBreakersRxns
ScaffoldNetworkParams(const std::vector< std::string > &bondBreakersSmarts)
std::vector< NetworkEdge > edges
edges in the network
std::vector< unsigned > molCounts
number of molecules each scaffold was found in
std::vector< std::string > nodes
SMILES for the scaffolds.
std::vector< unsigned > counts
number of times each scaffold was encountered