RDKit
Open-source cheminformatics and machine learning.
MorganFingerprints.h
Go to the documentation of this file.
1//
2//
3// Copyright (c) 2009-2010, Novartis Institutes for BioMedical Research Inc.
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following
14// disclaimer in the documentation and/or other materials provided
15// with the distribution.
16// * Neither the name of Novartis Institutes for BioMedical Research Inc.
17// nor the names of its contributors may be used to endorse or promote
18// products derived from this software without specific prior written
19// permission.
20//
21// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32//
33// Created by Greg Landrum, July 2008
34//
35//
36
37/*! \file MorganFingerprints.h
38
39*/
40#include <RDGeneral/export.h>
41#ifndef __RD_MORGANFPS_H__
42#define __RD_MORGANFPS_H__
43
44#include <vector>
45#include <map>
48#include <cstdint>
50
51namespace RDKit {
52class ROMol;
53namespace MorganFingerprints {
54typedef std::map<std::uint32_t,
55 std::vector<std::pair<std::uint32_t, std::uint32_t>>>
57
58const std::string morganFingerprintVersion = "1.0.0";
59
60//! returns the Morgan fingerprint for a molecule
61/*!
62 These fingerprints are similar to the well-known ECFP or
63 FCFP fingerprints, depending on which invariants are used.
64
65 The algorithm used is described in the paper
66 Rogers, D. & Hahn, M. Extended-Connectivity Fingerprints. JCIM 50:742-54
67 (2010)
68 https://doi.org/10.1021/ci100050t
69
70 The original implementation was done using this paper:
71 D. Rogers, R.D. Brown, M. Hahn J. Biomol. Screen. 10:682-6 (2005)
72 and an unpublished technical report:
73 http://www.ics.uci.edu/~welling/teaching/ICS274Bspring06/David%20Rogers%20-%20ECFP%20Manuscript.doc
74
75 \param mol: the molecule to be fingerprinted
76 \param radius: the number of iterations to grow the fingerprint
77 \param invariants : optional pointer to a set of atom invariants to
78 be used. By default ECFP-type invariants are used
79 (calculated by getConnectivityInvariants())
80 \param fromAtoms : if this is provided, only the atoms in the vector will be
81 used as centers in the fingerprint
82 \param useChirality : if set, additional information will be added to the
83 fingerprint
84 when chiral atoms are discovered. This will cause
85 \verbatim C[C@H](F)Cl,
86 C[C@@H](F)Cl, and CC(F)Cl \endverbatim to generate
87 different fingerprints.
88 \param useBondTypes : if set, bond types will be included as part of the hash
89 for
90 calculating bits
91 \param useCounts : if set, counts of the features will be used
92 \param onlyNonzeroInvariants : if set, bits will only be set from atoms that
93 have a nonzero invariant.
94 \param atomsSettingBits : if nonzero, this will be used to return information
95 about the atoms that set each particular bit.
96 The keys are the map are bit ids, the values
97 are lists of (atomId, radius) pairs.
98 \param includeRedundantEnvironments : if set, the check for redundant atom
99 environments will not be done.
100
101 \return a pointer to the fingerprint. The client is
102 responsible for calling delete on this.
103
104*/
106 const ROMol &mol, unsigned int radius,
107 std::vector<boost::uint32_t> *invariants = nullptr,
108 const std::vector<boost::uint32_t> *fromAtoms = nullptr,
109 bool useChirality = false, bool useBondTypes = true, bool useCounts = true,
110 bool onlyNonzeroInvariants = false, BitInfoMap *atomsSettingBits = nullptr,
111 bool includeRedundantEnvironments = false);
112
113//! returns the Morgan fingerprint for a molecule
114/*!
115 These fingerprints are similar to the well-known ECFP or
116 FCFP fingerprints, depending on which invariants are used.
117
118 The algorithm used is described in the paper
119 Rogers, D. & Hahn, M. Extended-Connectivity Fingerprints. JCIM 50:742-54
120 (2010)
121 https://doi.org/10.1021/ci100050t
122
123 The original implementation was done using this paper:
124 D. Rogers, R.D. Brown, M. Hahn J. Biomol. Screen. 10:682-6 (2005)
125 and an unpublished technical report:
126 http://www.ics.uci.edu/~welling/teaching/ICS274Bspring06/David%20Rogers%20-%20ECFP%20Manuscript.doc
127
128 \param mol: the molecule to be fingerprinted
129 \param radius: the number of iterations to grow the fingerprint
130 \param invariants : optional pointer to a set of atom invariants to
131 be used. By default ECFP-type invariants are used
132 (calculated by getConnectivityInvariants())
133 \param fromAtoms : if this is provided, only the atoms in the vector will be
134 used as centers in the fingerprint
135 \param useChirality : if set, additional information will be added to the
136 fingerprint
137 when chiral atoms are discovered. This will cause
138 \verbatim C[C@H](F)Cl,
139 C[C@@H](F)Cl, and CC(F)Cl \endverbatim to generate
140 different fingerprints.
141 \param useBondTypes : if set, bond types will be included as part of the hash
142 for
143 calculating bits
144 \param onlyNonzeroInvariants : if set, bits will only be set from atoms that
145 have a nonzero invariant.
146 \param atomsSettingBits : if nonzero, this will be used to return information
147 about the atoms that set each particular bit.
148 The keys are the map are bit ids, the values
149 are lists of (atomId, radius) pairs.
150 \param includeRedundantEnvironments : if set, the check for redundant atom
151 environments will not be done.
152
153 \return a pointer to the fingerprint. The client is
154 responsible for calling delete on this.
155
156*/
158 const ROMol &mol, unsigned int radius, unsigned int nBits = 2048,
159 std::vector<boost::uint32_t> *invariants = nullptr,
160 const std::vector<boost::uint32_t> *fromAtoms = nullptr,
161 bool useChirality = false, bool useBondTypes = true,
162 bool onlyNonzeroInvariants = false, BitInfoMap *atomsSettingBits = nullptr,
163 bool includeRedundantEnvironments = false);
164
165//! returns the Morgan fingerprint for a molecule as a bit vector
166/*!
167 see documentation for getFingerprint() for theory/references
168
169 \param mol: the molecule to be fingerprinted
170 \param radius: the number of iterations to grow the fingerprint
171 \param nBits: the number of bits in the final fingerprint
172 \param invariants : optional pointer to a set of atom invariants to
173 be used. By default ECFP-type invariants are used
174 (calculated by getConnectivityInvariants())
175 \param fromAtoms : if this is provided, only the atoms in the vector will be
176 used as centers in the fingerprint
177 \param useChirality : if set, additional information will be added to the
178 fingerprint
179 when chiral atoms are discovered. This will cause
180 \verbatim C[C@H](F)Cl,
181 C[C@@H](F)Cl, and CC(F)Cl \endverbatim to generate
182 different fingerprints.
183 \param useBondTypes : if set, bond types will be included as part of the hash
184 for
185 calculating bits
186 \param onlyNonzeroInvariants : if set, bits will only be set from atoms that
187 have a nonzero invariant.
188 \param atomsSettingBits : if nonzero, this will be used to return information
189 about the atoms that set each particular bit.
190 The keys are the map are bit ids, the values
191 are lists of (atomId, radius) pairs.
192 \param includeRedundantEnvironments : if set, the check for redundant atom
193 environments will not be done.
194
195 \return a pointer to the fingerprint. The client is
196 responsible for calling delete on this.
197
198*/
200 const ROMol &mol, unsigned int radius, unsigned int nBits,
201 std::vector<std::uint32_t> *invariants = nullptr,
202 const std::vector<std::uint32_t> *fromAtoms = nullptr,
203 bool useChirality = false, bool useBondTypes = true,
204 bool onlyNonzeroInvariants = false, BitInfoMap *atomsSettingBits = nullptr,
205 bool includeRedundantEnvironments = false);
206
207} // end of namespace MorganFingerprints
208} // namespace RDKit
209
210#endif
a class for bit vectors that are densely occupied
a class for efficiently storing sparse vectors of ints
Definition: SparseIntVect.h:28
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:177
std::map< std::uint32_t, std::vector< std::pair< std::uint32_t, std::uint32_t > > > BitInfoMap
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::uint32_t > * getFingerprint(const ROMol &mol, unsigned int radius, std::vector< boost::uint32_t > *invariants=nullptr, const std::vector< boost::uint32_t > *fromAtoms=nullptr, bool useChirality=false, bool useBondTypes=true, bool useCounts=true, bool onlyNonzeroInvariants=false, BitInfoMap *atomsSettingBits=nullptr, bool includeRedundantEnvironments=false)
returns the Morgan fingerprint for a molecule
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * getFingerprintAsBitVect(const ROMol &mol, unsigned int radius, unsigned int nBits, std::vector< std::uint32_t > *invariants=nullptr, const std::vector< std::uint32_t > *fromAtoms=nullptr, bool useChirality=false, bool useBondTypes=true, bool onlyNonzeroInvariants=false, BitInfoMap *atomsSettingBits=nullptr, bool includeRedundantEnvironments=false)
returns the Morgan fingerprint for a molecule as a bit vector
const std::string morganFingerprintVersion
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::uint32_t > * getHashedFingerprint(const ROMol &mol, unsigned int radius, unsigned int nBits=2048, std::vector< boost::uint32_t > *invariants=nullptr, const std::vector< boost::uint32_t > *fromAtoms=nullptr, bool useChirality=false, bool useBondTypes=true, bool onlyNonzeroInvariants=false, BitInfoMap *atomsSettingBits=nullptr, bool includeRedundantEnvironments=false)
returns the Morgan fingerprint for a molecule
Std stuff.
Definition: Abbreviations.h:19