RDKit
Open-source cheminformatics and machine learning.
RGroupDecomp.h
Go to the documentation of this file.
1//
2// Copyright (c) 2017-2021, Novartis Institutes for BioMedical Research Inc.
3// and other RDKit contributors
4//
5// @@ All Rights Reserved @@
6// This file is part of the RDKit.
7// The contents are covered by the terms of the BSD license
8// which is included in the file license.txt, found at the root
9// of the RDKit source tree.
10//
11#include <RDGeneral/export.h>
12#ifndef RDKIT_RGROUPDECOMP_H
13#define RDKIT_RGROUPDECOMP_H
14
15#include "../RDKitBase.h"
17#include <chrono>
18
19namespace RDKit {
20
21//! Compute the isomorphic degenerative points in the
22//! molecule. These are points that are symmetrically
23//! equivalent.
24/*!
25 \param mol Molecule to compute the degenerative points
26
27 \return the set of degenerative points (set<unsigned int>)
28*/
29
30typedef enum {
36 DummyAtomLabels = 0x20, // These are rgroups but will get relabelled
37 AutoDetect = 0xFF,
39
40typedef enum {
41 Greedy = 0x01,
43 Exhaustive = 0x04, // not really useful for large sets
45 GA = 0x10,
47
48typedef enum {
49 AtomMap = 0x01,
50 Isotope = 0x02,
51 MDLRGroup = 0x04,
53
54typedef enum {
55 // DEPRECATED, remove the following line in release 2021.03
56 None = 0x0,
58 MCS = 0x01,
60
61typedef enum {
62 Match = 0x1,
65
67 const bool success;
68 const double score;
69 RGroupDecompositionProcessResult(const bool success, const double score)
70 : success(success), score(score) {}
71};
72
73struct RGroupMatch;
74
76 unsigned int labels = AutoDetect;
77 unsigned int matchingStrategy = GreedyChunks;
78 unsigned int scoreMethod = Match;
79 unsigned int rgroupLabelling = AtomMap | MDLRGroup;
80 unsigned int alignment = MCS;
81
82 unsigned int chunkSize = 5;
83 //! only allow rgroup decomposition at the specified rgroups
84 bool onlyMatchAtRGroups = false;
85 //! remove all user-defined rgroups that only have hydrogens
86 bool removeAllHydrogenRGroups = true;
87 //! remove all user-defined rgroups that only have hydrogens,
88 //! and also remove the corresponding labels from the core
89 bool removeAllHydrogenRGroupsAndLabels = true;
90 //! remove all hydrogens from the output molecules
91 bool removeHydrogensPostMatch = true;
92 //! allow labelled Rgroups of degree 2 or more
93 bool allowNonTerminalRGroups = false;
94 // unlabelled core atoms can have multiple rgroups
95 bool allowMultipleRGroupsOnUnlabelled = false;
96
97 double timeout = -1.0; ///< timeout in seconds. <=0 indicates no timeout
98
99 // Determine how to assign the rgroup labels from the given core
100 unsigned int autoGetLabels(const RWMol &);
101
102 // Prepare the core for substructure searching and rgroup assignment
103 bool prepareCore(RWMol &, const RWMol *alignCore);
104
105 // Add r groups to unlabelled atoms if allowMultipleRGroupsOnUnlabelled is set
107
108 // Parameters specific to GA
109
110 // GA population size or -1 to use best guess
111 int gaPopulationSize = -1;
112 // GA maximum number of operations or -1 to use best guess
113 int gaMaximumOperations = -1;
114 // GA number of operations permitted without improvement before exiting (-1
115 // for best guess)
116 int gaNumberOperationsWithoutImprovement = -1;
117 // GA random number seed (-1 for default, -2 for random seed)
118 int gaRandomSeed = -1;
119 // Number of runs
120 int gaNumberRuns = 1;
121 // Sequential or parallel runs?
122#ifdef RDK_BUILD_THREADSAFE_SSS
123 bool gaParallelRuns = true;
124#else
125 bool gaParallelRuns = false;
126#endif
127 // Controls the way substructure matching with the core is done
129
130 RGroupDecompositionParameters() { substructmatchParams.useChirality = true; }
131
132 private:
133 int indexOffset{-1};
134 void checkNonTerminal(const Atom &atom) const;
135};
136
137typedef std::map<std::string, ROMOL_SPTR> RGroupRow;
138typedef std::vector<ROMOL_SPTR> RGroupColumn;
139
140typedef std::vector<RGroupRow> RGroupRows;
141typedef std::map<std::string, RGroupColumn> RGroupColumns;
142
144 public:
145 UsedLabelMap(const std::map<int, int> &mapping) {
146 for (const auto &rl : mapping) {
147 d_map[rl.second] = std::make_pair(false, (rl.first > 0));
148 }
149 }
150 bool has(int label) const { return d_map.find(label) != d_map.end(); }
151 bool getIsUsed(int label) const { return d_map.at(label).first; }
152 void setIsUsed(int label) { d_map[label].first = true; }
153 bool isUserDefined(int label) const { return d_map.at(label).second; }
154
155 private:
156 std::map<int, std::pair<bool, bool>> d_map;
157};
158
159struct RGroupDecompData;
161 private:
162 RGroupDecompData *data; // implementation details
163 RGroupDecomposition(const RGroupDecomposition &); // no copy construct
164 RGroupDecomposition &operator=(
165 const RGroupDecomposition &); // Prevent assignment
166 RWMOL_SPTR outputCoreMolecule(const RGroupMatch &match,
167 const UsedLabelMap &usedRGroupMap) const;
168 std::map<int, bool> getBlankRGroupMap() const;
169
170 public:
172 const RGroupDecompositionParameters &params =
174 RGroupDecomposition(const std::vector<ROMOL_SPTR> &cores,
175 const RGroupDecompositionParameters &params =
177
179
180 //! Returns the index of the added molecule in the RGroupDecomposition
181 /// or a negative error code
182 /// :param mol: Molecule to add to the decomposition
183 /// :result: index of the molecle or
184 /// -1 if none of the core matches
185 /// -2 if the matched molecule has no sidechains, i.e. is the
186 /// same as the scaffold
187 int add(const ROMol &mol);
189 bool process();
190
192 //! return the current group labels
193 std::vector<std::string> getRGroupLabels() const;
194
195 //! return rgroups in row order group[row][attachment_point] = ROMol
197 //! return rgroups in column order group[attachment_point][row] = ROMol
199};
200
202 const std::vector<ROMOL_SPTR> &cores, const std::vector<ROMOL_SPTR> &mols,
203 RGroupRows &rows, std::vector<unsigned int> *unmatched = nullptr,
204 const RGroupDecompositionParameters &options =
206
208 const std::vector<ROMOL_SPTR> &cores, const std::vector<ROMOL_SPTR> &mols,
209 RGroupColumns &columns, std::vector<unsigned int> *unmatched = nullptr,
210 const RGroupDecompositionParameters &options =
212
213inline bool checkForTimeout(const std::chrono::steady_clock::time_point &t0,
214 double timeout, bool throwOnTimeout = true) {
215 if (timeout <= 0) {
216 return false;
217 }
218 auto t1 = std::chrono::steady_clock::now();
219 std::chrono::duration<double> elapsed = t1 - t0;
220 if (elapsed.count() >= timeout) {
221 if (throwOnTimeout) {
222 throw std::runtime_error("operation timed out");
223 }
224 return true;
225 }
226 return false;
227}
228
229} // namespace RDKit
230
231#endif
RGroupRows getRGroupsAsRows() const
return rgroups in row order group[row][attachment_point] = ROMol
RGroupDecomposition(const std::vector< ROMOL_SPTR > &cores, const RGroupDecompositionParameters &params=RGroupDecompositionParameters())
const RGroupDecompositionParameters & params() const
RGroupColumns getRGroupsAsColumns() const
return rgroups in column order group[attachment_point][row] = ROMol
RGroupDecomposition(const ROMol &core, const RGroupDecompositionParameters &params=RGroupDecompositionParameters())
int add(const ROMol &mol)
RGroupDecompositionProcessResult processAndScore()
std::vector< std::string > getRGroupLabels() const
return the current group labels
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
void setIsUsed(int label)
Definition: RGroupDecomp.h:152
bool getIsUsed(int label) const
Definition: RGroupDecomp.h:151
bool isUserDefined(int label) const
Definition: RGroupDecomp.h:153
UsedLabelMap(const std::map< int, int > &mapping)
Definition: RGroupDecomp.h:145
bool has(int label) const
Definition: RGroupDecomp.h:150
#define RDKIT_RGROUPDECOMPOSITION_EXPORT
Definition: export.h:401
Std stuff.
Definition: Abbreviations.h:19
@ FingerprintVariance
Definition: RGroupDecomp.h:63
RGroupCoreAlignment
Definition: RGroupDecomp.h:54
@ NoAlignment
Definition: RGroupDecomp.h:57
RGroupMatching
Definition: RGroupDecomp.h:40
@ NoSymmetrization
Definition: RGroupDecomp.h:44
@ Greedy
Definition: RGroupDecomp.h:41
@ Exhaustive
Definition: RGroupDecomp.h:43
@ GreedyChunks
Definition: RGroupDecomp.h:42
std::map< std::string, ROMOL_SPTR > RGroupRow
Definition: RGroupDecomp.h:137
std::vector< ROMOL_SPTR > RGroupColumn
Definition: RGroupDecomp.h:138
std::map< std::string, RGroupColumn > RGroupColumns
Definition: RGroupDecomp.h:141
RGroupLabels
Definition: RGroupDecomp.h:30
@ MDLRGroupLabels
Definition: RGroupDecomp.h:35
@ AtomMapLabels
Definition: RGroupDecomp.h:32
@ AtomIndexLabels
Definition: RGroupDecomp.h:33
@ RelabelDuplicateLabels
Definition: RGroupDecomp.h:34
@ AutoDetect
Definition: RGroupDecomp.h:37
@ DummyAtomLabels
Definition: RGroupDecomp.h:36
@ IsotopeLabels
Definition: RGroupDecomp.h:31
RGroupLabelling
Definition: RGroupDecomp.h:48
@ MDLRGroup
Definition: RGroupDecomp.h:51
@ AtomMap
Definition: RGroupDecomp.h:49
@ Isotope
Definition: RGroupDecomp.h:50
bool checkForTimeout(const std::chrono::steady_clock::time_point &t0, double timeout, bool throwOnTimeout=true)
Definition: RGroupDecomp.h:213
RDKIT_RGROUPDECOMPOSITION_EXPORT unsigned int RGroupDecompose(const std::vector< ROMOL_SPTR > &cores, const std::vector< ROMOL_SPTR > &mols, RGroupRows &rows, std::vector< unsigned int > *unmatched=nullptr, const RGroupDecompositionParameters &options=RGroupDecompositionParameters())
std::vector< RGroupRow > RGroupRows
Definition: RGroupDecomp.h:140
boost::shared_ptr< RWMol > RWMOL_SPTR
Definition: RWMol.h:217
void addDummyAtomsToUnlabelledCoreAtoms(RWMol &core)
unsigned int autoGetLabels(const RWMol &)
bool prepareCore(RWMol &, const RWMol *alignCore)
SubstructMatchParameters substructmatchParams
Definition: RGroupDecomp.h:128
RGroupDecompositionProcessResult(const bool success, const double score)
Definition: RGroupDecomp.h:69
RGroupMatch is the decomposition for a single molecule.
Definition: RGroupMatch.h:19