RDKit
Open-source cheminformatics and machine learning.
RGroupDecompData.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2017-2022 Novartis Institutes for BioMedical Research and
3 // other RDKit contributors
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #ifndef RGROUP_DECOMP_DATA
12 #define RGROUP_DECOMP_DATA
13 
14 #include "RGroupCore.h"
15 #include "RGroupScore.h"
16 #include "RGroupFingerprintScore.h"
17 #include <vector>
18 #include <map>
19 
20 namespace RDKit {
21 
22 extern const std::string _rgroupInputDummy;
23 
25  // matches[mol_idx] == vector of potential matches
26  std::map<int, RCore> cores;
27  std::map<std::string, int> newCores; // new "cores" found along the way
29  // this caches the running product of permutations
30  // across calls to process()
31  size_t permutationProduct = 1;
32  // this caches the size of the previous matches vector
33  // such that the size of the current chunk can be inferred
34  size_t previousMatchSize = 0;
35  // the default for Greedy/GreedyChunks is keeping only the best
36  // permutation after each call to process()
37  bool prunePermutations = true;
39 
40  std::vector<std::vector<RGroupMatch>> matches;
41  std::set<int> labels;
42  std::vector<size_t> permutation;
43  unsigned int pruneLength = 0U;
45  std::map<int, std::vector<int>> userLabels;
46 
47  std::vector<int> processedRlabels;
48 
49  std::map<int, int> finalRlabelMapping;
51 
52  RGroupDecompData(const RWMol &inputCore,
53  RGroupDecompositionParameters inputParams);
54 
55  RGroupDecompData(const std::vector<ROMOL_SPTR> &inputCores,
56  RGroupDecompositionParameters inputParams);
57 
58  void addCore(const ROMol &inputCore);
59 
60  void prepareCores();
61 
62  void setRlabel(Atom *atom, int rlabel);
63 
64  int getRlabel(Atom *atom) const;
65 
66  double scoreFromPrunedData(const std::vector<size_t> &permutation,
67  bool reset = true);
68 
69  void prune();
70 
71  // Return the RGroups with the current "best" permutation
72  // of matches.
73  std::vector<RGroupMatch> GetCurrentBestPermutation() const;
74 
75  class UsedLabels {
76  public:
77  std::set<int> labels_used;
78 
79  bool add(int rlabel);
80 
81  int next();
82  };
83 
84  void addCoreUserLabels(const RWMol &core, std::set<int> &userLabels);
85 
86  void addAtoms(RWMol &mol,
87  const std::vector<std::pair<Atom *, Atom *>> &atomsToAdd);
88 
89  bool replaceHydrogenCoreDummy(const RGroupMatch &match, RWMol &core,
90  const Atom &atom, const int currentLabel,
91  const int rLabel);
92 
93  void relabelCore(RWMol &core, std::map<int, int> &mappings,
94  UsedLabels &used_labels, const std::set<int> &indexLabels,
95  const std::map<int, std::vector<int>> &extraAtomRLabels,
96  const RGroupMatch *const match = nullptr);
97 
98  void relabelRGroup(RGroupData &rgroup, const std::map<int, int> &mappings);
99 
100  // relabel the core and sidechains using the specified user labels
101  // if matches exist for non labelled atoms, these are added as well
102  void relabel();
103 
104  double score(const std::vector<size_t> &permutation,
105  FingerprintVarianceScoreData *fingerprintVarianceScoreData =
106  nullptr) const;
107 
109  bool finalize = false);
110 };
111 } // namespace RDKit
112 
113 #endif
The class for representing atoms.
Definition: Atom.h:68
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
Std stuff.
Definition: Abbreviations.h:19
const std::string _rgroupInputDummy
const unsigned int EMPTY_CORE_LABEL
Definition: RGroupUtils.h:27
A single rgroup attached to a given core.
Definition: RGroupData.h:22
void relabelCore(RWMol &core, std::map< int, int > &mappings, UsedLabels &used_labels, const std::set< int > &indexLabels, const std::map< int, std::vector< int >> &extraAtomRLabels, const RGroupMatch *const match=nullptr)
std::vector< std::vector< RGroupMatch > > matches
FingerprintVarianceScoreData prunedFingerprintVarianceScoreData
RGroupDecompData(const RWMol &inputCore, RGroupDecompositionParameters inputParams)
double score(const std::vector< size_t > &permutation, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr) const
double scoreFromPrunedData(const std::vector< size_t > &permutation, bool reset=true)
void relabelRGroup(RGroupData &rgroup, const std::map< int, int > &mappings)
std::vector< size_t > permutation
std::map< int, std::vector< int > > userLabels
bool replaceHydrogenCoreDummy(const RGroupMatch &match, RWMol &core, const Atom &atom, const int currentLabel, const int rLabel)
RGroupDecompositionParameters params
std::map< std::string, int > newCores
RGroupDecompositionProcessResult process(bool pruneMatches, bool finalize=false)
std::map< int, RCore > cores
void setRlabel(Atom *atom, int rlabel)
void addCore(const ROMol &inputCore)
void addAtoms(RWMol &mol, const std::vector< std::pair< Atom *, Atom * >> &atomsToAdd)
std::vector< int > processedRlabels
int getRlabel(Atom *atom) const
std::map< int, int > finalRlabelMapping
void addCoreUserLabels(const RWMol &core, std::set< int > &userLabels)
RGroupDecompData(const std::vector< ROMOL_SPTR > &inputCores, RGroupDecompositionParameters inputParams)
std::vector< RGroupMatch > GetCurrentBestPermutation() const
RGroupMatch is the decomposition for a single molecule.
Definition: RGroupMatch.h:19