RDKit
Open-source cheminformatics and machine learning.
Abbreviations.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2020 Greg Landrum and T5 Informatics GmbH
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_ABBREVIATIONS_H
12 #define RD_ABBREVIATIONS_H
14 #include <utility>
15 #include <vector>
16 #include <string>
17 #include <memory>
18 
19 namespace RDKit {
20 class ROMol;
21 class RWMol;
22 
23 namespace Abbreviations {
25  std::string label;
26  std::string displayLabel;
27  std::string displayLabelW;
28  std::string smarts;
29  std::shared_ptr<ROMol> mol; //!< optional
30  std::vector<unsigned int> extraAttachAtoms; //!< optional
31  bool operator==(const AbbreviationDefinition& other) const {
32  return label == other.label && displayLabel == other.displayLabel &&
33  displayLabelW == other.displayLabelW && smarts == other.smarts;
34  }
35  bool operator!=(const AbbreviationDefinition& other) const {
36  return !(*this == other);
37  }
38 };
42  AbbreviationMatch(std::vector<std::pair<int, int>> matchArg,
43  AbbreviationDefinition abbrevArg)
44  : match(std::move(matchArg)), abbrev(std::move(abbrevArg)) {}
45  AbbreviationMatch() : match(), abbrev() {}
46  bool operator==(const AbbreviationMatch& other) const {
47  return abbrev == other.abbrev && match == other.match;
48  }
49  bool operator!=(const AbbreviationMatch& other) const {
50  return !(*this == other);
51  }
52 };
53 namespace common_properties {
54 RDKIT_ABBREVIATIONS_EXPORT extern const std::string numDummies;
55 RDKIT_ABBREVIATIONS_EXPORT extern const std::string origAtomMapping;
56 RDKIT_ABBREVIATIONS_EXPORT extern const std::string origBondMapping;
57 } // namespace common_properties
58 namespace Utils {
59 //! returns the default set of abbreviation definitions
60 RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
62 //! returns the default set of linker definitions
63 RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
65 
66 //! parses a string describing abbreviation matches and returns the result
67 /*
68 
69 \param text the data to be parsed, see below for the format
70 \param removeExtraDummies controls whether or not dummy atoms beyond atom 0 are
71  removed. Set this to true to create abbreviations for linkers
72 \param allowConnectionToDummies allows abbreviations to directly connect to
73  abbreviations. set this to true for linkers
74 
75 Format of the text data:
76  A series of lines, each of which contains:
77 
78  label SMARTS displayLabel displayLabelW
79 
80  the "displayLabel" and "displayLabelW" fields are optional.
81  where label is the label used for the abbreviation,
82  SMARTS is the SMARTS definition of the abbreviation.
83  displayLabel is used in drawings to render the abbreviations.
84  displayLabelW is the display label if a bond comes in from the right
85 
86  Use dummies to indicate attachment points. The assumption is that the first
87  atom is a dummy (one will be added if this is not true) and that the second
88  atom is the surrogate for the rest of the group.
89 
90 */
91 RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
92 parseAbbreviations(const std::string& text, bool removeExtraDummies = false,
93  bool allowConnectionToDummies = false);
94 //! \brief equivalent to calling \c parseAbbreviations(text,true,true)
95 inline std::vector<AbbreviationDefinition> parseLinkers(
96  const std::string& text) {
97  return parseAbbreviations(text, true, true);
98 };
99 } // namespace Utils
100 
101 //! returns all matches for the abbreviations across the molecule
102 /*!
103 
104  \param abbrevs the abbreviations to look for. This list is used in order.
105  \param maxCoverage any abbreviation that covers than more than this fraction
106  of the molecule's atoms (not counting dummies) will not be returned.
107 */
108 RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationMatch>
110  const ROMol& mol, const std::vector<AbbreviationDefinition>& abbrevs,
111  double maxCoverage = 0.4);
112 //! applies the abbreviation matches to a molecule, modifying it in place.
113 //! the modified molecule is not sanitized
115  RWMol& mol, const std::vector<AbbreviationMatch>& matches);
116 //! creates "SUP" SubstanceGroups on the molecule describing the abbreviation
118  RWMol& mol, const std::vector<AbbreviationMatch>& matches);
119 //! convenience function for finding and applying abbreviations
120 //! the modified molecule is not sanitized
122  RWMol& mol, const std::vector<AbbreviationDefinition>& abbrevs,
123  double maxCoverage = 0.4, bool sanitize = true);
124 //! convenience function for finding and labeling abbreviations as SUP
125 //! SubstanceGroups
127  RWMol& mol, const std::vector<AbbreviationDefinition>& abbrevs,
128  double maxCoverage = 0.4);
129 //! collapses abbreviation (i.e. "SUP") substance groups
130 //! the modified molecule is not sanitized
132 
133 } // namespace Abbreviations
134 } // namespace RDKit
135 #endif
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
#define RDKIT_ABBREVIATIONS_EXPORT
Definition: export.h:9
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationDefinition > getDefaultAbbreviations()
returns the default set of abbreviation definitions
std::vector< AbbreviationDefinition > parseLinkers(const std::string &text)
equivalent to calling parseAbbreviations(text,true,true)
Definition: Abbreviations.h:95
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationDefinition > parseAbbreviations(const std::string &text, bool removeExtraDummies=false, bool allowConnectionToDummies=false)
parses a string describing abbreviation matches and returns the result
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationDefinition > getDefaultLinkers()
returns the default set of linker definitions
RDKIT_ABBREVIATIONS_EXPORT const std::string origAtomMapping
RDKIT_ABBREVIATIONS_EXPORT const std::string origBondMapping
RDKIT_ABBREVIATIONS_EXPORT const std::string numDummies
RDKIT_ABBREVIATIONS_EXPORT void applyMatches(RWMol &mol, const std::vector< AbbreviationMatch > &matches)
RDKIT_ABBREVIATIONS_EXPORT void condenseAbbreviationSubstanceGroups(RWMol &mol)
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationMatch > findApplicableAbbreviationMatches(const ROMol &mol, const std::vector< AbbreviationDefinition > &abbrevs, double maxCoverage=0.4)
returns all matches for the abbreviations across the molecule
RDKIT_ABBREVIATIONS_EXPORT void labelMolAbbreviations(RWMol &mol, const std::vector< AbbreviationDefinition > &abbrevs, double maxCoverage=0.4)
RDKIT_ABBREVIATIONS_EXPORT void condenseMolAbbreviations(RWMol &mol, const std::vector< AbbreviationDefinition > &abbrevs, double maxCoverage=0.4, bool sanitize=true)
RDKIT_ABBREVIATIONS_EXPORT void labelMatches(RWMol &mol, const std::vector< AbbreviationMatch > &matches)
creates "SUP" SubstanceGroups on the molecule describing the abbreviation
Std stuff.
Definition: Abbreviations.h:19
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
std::vector< unsigned int > extraAttachAtoms
optional
Definition: Abbreviations.h:30
bool operator==(const AbbreviationDefinition &other) const
Definition: Abbreviations.h:31
std::shared_ptr< ROMol > mol
optional
Definition: Abbreviations.h:29
bool operator!=(const AbbreviationDefinition &other) const
Definition: Abbreviations.h:35
bool operator!=(const AbbreviationMatch &other) const
Definition: Abbreviations.h:49
AbbreviationMatch(std::vector< std::pair< int, int >> matchArg, AbbreviationDefinition abbrevArg)
Definition: Abbreviations.h:42
bool operator==(const AbbreviationMatch &other) const
Definition: Abbreviations.h:46