RDKit
Open-source cheminformatics and machine learning.
Subgraphs.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2003-2022 Greg Landrum and other RDKit contributors
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 /*! \file Subgraphs.h
12 
13  \brief functionality for finding subgraphs and paths in molecules
14 
15  Difference between _subgraphs_ and _paths_ :
16  Subgraphs are potentially branched, whereas paths (in our
17  terminology at least) cannot be. So, the following graph:
18 \verbatim
19  C--0--C--1--C--3--C
20  |
21  2
22  |
23  C
24 \endverbatim
25  has 3 _subgraphs_ of length 3: (0,1,2),(0,1,3),(2,1,3)
26  but only 2 _paths_ of length 3: (0,1,3),(2,1,3)
27 */
28 #include <RDGeneral/export.h>
29 #ifndef RD_SUBGRAPHS_H
30 #define RD_SUBGRAPHS_H
31 
32 #include <vector>
33 #include <list>
34 #include <map>
35 #include <unordered_map>
36 
37 namespace RDKit {
38 class ROMol;
39 // NOTE: before replacing the defn of PATH_TYPE: be aware that
40 // we do occasionally use reverse iterators on these things, so
41 // replacing with a slist would probably be a bad idea.
42 typedef std::vector<int> PATH_TYPE;
43 typedef std::list<PATH_TYPE> PATH_LIST;
44 typedef PATH_LIST::const_iterator PATH_LIST_CI;
45 
46 typedef std::map<int, PATH_LIST> INT_PATH_LIST_MAP;
47 typedef INT_PATH_LIST_MAP::const_iterator INT_PATH_LIST_MAP_CI;
48 typedef INT_PATH_LIST_MAP::iterator INT_PATH_LIST_MAP_I;
49 
50 // --- --- --- --- --- --- --- --- --- --- --- --- ---
51 //
52 //
53 // --- --- --- --- --- --- --- --- --- --- --- --- ---
54 
55 //! \brief find all bond subgraphs in a range of sizes
56 /*!
57  * \param mol - the molecule to be considered
58  * \param lowerLen - the minimum subgraph size to find
59  * \param upperLen - the maximum subgraph size to find
60  * \param useHs - if set, hydrogens in the graph will be considered
61  * eligible to be in paths. NOTE: this will not add
62  * Hs to the graph.
63  * \param rootedAtAtom - if non-negative, only subgraphs that start at
64  * this atom will be returned.
65  *
66  * The result is a map from subgraph size -> list of paths
67  * (i.e. list of list of bond indices)
68  */
70  const ROMol &mol, unsigned int lowerLen, unsigned int upperLen,
71  bool useHs = false, int rootedAtAtom = -1);
72 
73 //! \brief find all bond subgraphs of a particular size
74 /*!
75  * \param mol - the molecule to be considered
76  * \param targetLen - the length of the subgraphs to be returned
77  * \param useHs - if set, hydrogens in the graph will be considered
78  * eligible to be in paths. NOTE: this will not add
79  * Hs to the graph.
80  * \param rootedAtAtom - if non-negative, only subgraphs that start at
81  * this atom will be returned.
82  *
83  *
84  * The result is a list of paths (i.e. list of list of bond indices)
85  */
87 findAllSubgraphsOfLengthN(const ROMol &mol, unsigned int targetLen,
88  bool useHs = false, int rootedAtAtom = -1);
89 
90 //! \brief find unique bond subgraphs of a particular size
91 /*!
92  * \param mol - the molecule to be considered
93  * \param targetLen - the length of the subgraphs to be returned
94  * \param useHs - if set, hydrogens in the graph will be considered
95  * eligible to be in paths. NOTE: this will not add
96  * Hs to the graph.
97  * \param useBO - if set, bond orders will be considered when uniquifying
98  * the paths
99  * \param rootedAtAtom - if non-negative, only subgraphs that start at
100  * this atom will be returned.
101  *
102  * The result is a list of paths (i.e. list of list of bond indices)
103  */
105  const ROMol &mol, unsigned int targetLen, bool useHs = false,
106  bool useBO = true, int rootedAtAtom = -1);
107 //! \brief find all paths of a particular size
108 /*!
109  * \param mol - the molecule to be considered
110  * \param targetLen - the length of the paths to be returned
111  * \param useBonds - if set, the path indices will be bond indices,
112  * not atom indices
113  * \param useHs - if set, hydrogens in the graph will be considered
114  * eligible to be in paths. NOTE: this will not add
115  * Hs to the graph.
116  * \param rootedAtAtom - if non-negative, only subgraphs that start at
117  * this atom will be returned.
118  * \param onlyShortestPaths - if set then only paths which are <= the shortest
119  * path between the begin and end atoms will be
120  * included in the results
121  *
122  * The result is a list of paths (i.e. list of list of bond indices)
123  */
125  const ROMol &mol, unsigned int targetLen, bool useBonds = true,
126  bool useHs = false, int rootedAtAtom = -1, bool onlyShortestPaths = false);
128  const ROMol &mol, unsigned int lowerLen, unsigned int upperLen,
129  bool useBonds = true, bool useHs = false, int rootedAtAtom = -1,
130  bool onlyShortestPaths = false);
131 
132 //! \brief Find bond subgraphs of a particular radius around an atom.
133 //! Return empty result if there is no bond at the requested radius.
134 /*!
135  * \param mol - the molecule to be considered
136  * \param radius - the radius of the subgraphs to be considered
137  * \param rootedAtAtom - the atom to consider
138  * \param useHs - if set, hydrogens in the graph will be considered
139  * eligible to be in paths. NOTE: this will not add
140  * Hs to the graph.
141  * \param enforceSize - If false, all the bonds within the requested radius
142  * (<= radius) is collected. Otherwise, at least one bond
143  * located at the requested radius must be found and
144  * added. \param atomMap - Optional: If provided, it will measure the minimum
145  * distance of the atom from the rooted atom (start with 0 from the rooted
146  * atom). The result is a pair of the atom ID and the distance. The result is a
147  * path (a vector of bond indices)
148  */
150  const ROMol &mol, unsigned int radius, unsigned int rootedAtAtom,
151  bool useHs = false, bool enforceSize = true,
152  std::unordered_map<unsigned int, unsigned int> *atomMap = nullptr);
153 
154 } // namespace RDKit
155 
156 #endif
#define RDKIT_SUBGRAPHS_EXPORT
Definition: export.h:473
Std stuff.
Definition: Abbreviations.h:19
RDKIT_SUBGRAPHS_EXPORT PATH_LIST findUniqueSubgraphsOfLengthN(const ROMol &mol, unsigned int targetLen, bool useHs=false, bool useBO=true, int rootedAtAtom=-1)
find unique bond subgraphs of a particular size
RDKIT_SUBGRAPHS_EXPORT PATH_LIST findAllPathsOfLengthN(const ROMol &mol, unsigned int targetLen, bool useBonds=true, bool useHs=false, int rootedAtAtom=-1, bool onlyShortestPaths=false)
find all paths of a particular size
std::list< PATH_TYPE > PATH_LIST
Definition: Subgraphs.h:43
RDKIT_SUBGRAPHS_EXPORT PATH_TYPE findAtomEnvironmentOfRadiusN(const ROMol &mol, unsigned int radius, unsigned int rootedAtAtom, bool useHs=false, bool enforceSize=true, std::unordered_map< unsigned int, unsigned int > *atomMap=nullptr)
Find bond subgraphs of a particular radius around an atom. Return empty result if there is no bond at...
std::vector< int > PATH_TYPE
Definition: Subgraphs.h:38
RDKIT_SUBGRAPHS_EXPORT PATH_LIST findAllSubgraphsOfLengthN(const ROMol &mol, unsigned int targetLen, bool useHs=false, int rootedAtAtom=-1)
find all bond subgraphs of a particular size
std::map< int, PATH_LIST > INT_PATH_LIST_MAP
Definition: Subgraphs.h:46
RDKIT_SUBGRAPHS_EXPORT INT_PATH_LIST_MAP findAllPathsOfLengthsMtoN(const ROMol &mol, unsigned int lowerLen, unsigned int upperLen, bool useBonds=true, bool useHs=false, int rootedAtAtom=-1, bool onlyShortestPaths=false)
INT_PATH_LIST_MAP::iterator INT_PATH_LIST_MAP_I
Definition: Subgraphs.h:48
INT_PATH_LIST_MAP::const_iterator INT_PATH_LIST_MAP_CI
Definition: Subgraphs.h:47
RDKIT_SUBGRAPHS_EXPORT INT_PATH_LIST_MAP findAllSubgraphsOfLengthsMtoN(const ROMol &mol, unsigned int lowerLen, unsigned int upperLen, bool useHs=false, int rootedAtAtom=-1)
find all bond subgraphs in a range of sizes
PATH_LIST::const_iterator PATH_LIST_CI
Definition: Subgraphs.h:44