RDKit
Open-source cheminformatics and machine learning.
SmilesParse.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2021 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SMILESPARSE_H
12#define RD_SMILESPARSE_H
13
14#include <GraphMol/RWMol.h>
16#include <string>
17#include <exception>
18#include <map>
19
20namespace RDKit {
21
23 int debugParse = 0; /**< enable debugging in the SMILES parser*/
24 bool sanitize = true; /**< sanitize the molecule after building it */
25 std::map<std::string, std::string> *replacements =
26 nullptr; /**< allows SMILES "macros" */
27 bool allowCXSMILES = true; /**< recognize and parse CXSMILES*/
28 bool strictCXSMILES =
29 true; /**< throw an exception if the CXSMILES parsing fails */
30 bool parseName = true; /**< parse (and set) the molecule name as well */
31 bool removeHs = true; /**< remove Hs after constructing the molecule */
32 bool useLegacyStereo =
33 true; /**< \deprecated use the legacy stereochemistry perception code
34 DEPRECATED, please use Chirality::setUseLegacyStereoPerception()
35 instead. */
36 bool skipCleanup =
37 false; /**< skip the final cleanup stage (for internal use) */
38};
40 const SmilesParserParams &params);
41
44
45//! Construct a molecule from a SMILES string
46/*!
47 \param smi the SMILES to convert
48 \param debugParse toggles verbose debugging information from the parser
49 \param sanitize toggles H removal and sanitization of the molecule
50 \param replacements a string->string map of replacement strings. See below
51 for more information about replacements.
52
53 \return a pointer to the new molecule; the caller is responsible for free'ing
54 this.
55
56 The optional replacements map can be used to do string substitution of
57 abbreviations
58 in the input SMILES. The set of substitutions is repeatedly looped through
59 until
60 the string no longer changes. It is the responsibility of the caller to make
61 sure
62 that substitutions results in legal and sensible SMILES.
63
64 Examples of substitutions:
65 \code
66 CC{Q}C with {"{Q}":"OCCO"} -> CCOCCOC
67 C{A}C{Q}C with {"{Q}":"OCCO", "{A}":"C1(CC1)"} -> CC1(CC1)COCCOC
68 C{A}C{Q}C with {"{Q}":"{X}CC{X}", "{A}":"C1CC1", "{X}":"N"} -> CC1CC1CNCCNC
69 \endcode
70
71 */
73 const std::string &smi, int debugParse = 0, bool sanitize = true,
74 std::map<std::string, std::string> *replacements = nullptr) {
75 SmilesParserParams params;
76 params.debugParse = debugParse;
77 params.replacements = replacements;
78 if (sanitize) {
79 params.sanitize = true;
80 params.removeHs = true;
81 } else {
82 params.sanitize = false;
83 params.removeHs = false;
84 }
85 return SmilesToMol(smi, params);
86};
87
89 int debugParse = 0; /**< enable debugging in the SMARTS parser*/
90 std::map<std::string, std::string> *replacements =
91 nullptr; /**< allows SMARTS "macros" */
92 bool allowCXSMILES = true; /**< recognize and parse CXSMILES extensions */
93 bool strictCXSMILES =
94 true; /**< throw an exception if the CXSMILES parsing fails */
95 bool parseName = true; /**< parse (and set) the molecule name as well */
96 bool mergeHs =
97 true; /**< toggles merging H atoms in the SMARTS into neighboring atoms*/
98 bool skipCleanup =
99 false; /**< skip the final cleanup stage (for internal use) */
100};
102 const SmartsParserParams &ps);
103
104//! Construct a molecule from a SMARTS string
105/*!
106 \param sma the SMARTS to convert
107 \param debugParse toggles verbose debugging information from the parser
108 \param mergeHs toggles merging H atoms in the SMARTS into neighboring
109 atoms
110 \param replacements a string->string map of replacement strings.
111 \see SmilesToMol for more information about replacements
112
113 \return a pointer to the new molecule; the caller is responsible for free'ing
114 this.
115 */
117 const std::string &sma, int debugParse = 0, bool mergeHs = false,
118 std::map<std::string, std::string> *replacements = nullptr) {
120 ps.debugParse = debugParse;
121 ps.mergeHs = mergeHs;
122 ps.replacements = replacements;
123 return SmartsToMol(sma, ps);
124};
125
128
129class RDKIT_SMILESPARSE_EXPORT SmilesParseException : public std::exception {
130 public:
131 SmilesParseException(const char *msg) : _msg(msg) {}
132 SmilesParseException(const std::string msg) : _msg(msg) {}
133 const char *what() const noexcept override { return _msg.c_str(); }
134 ~SmilesParseException() noexcept override = default;
135
136 private:
137 std::string _msg;
138};
139
140inline std::unique_ptr<RDKit::RWMol> operator"" _smiles(const char *text,
141 size_t len) {
142 std::string smi(text, len);
143 RWMol *ptr = nullptr;
144 try {
145 ptr = SmilesToMol(smi);
146 } catch (const RDKit::MolSanitizeException &) {
147 ptr = nullptr;
148 }
149 return std::unique_ptr<RWMol>(ptr);
150}
151inline std::unique_ptr<RDKit::RWMol> operator"" _smarts(const char *text,
152 size_t len) {
153 std::string smi(text, len);
154 // no need for exception handling here: SmartsToMol() doesn't do
155 // sanitization
156 RWMol *ptr = SmartsToMol(smi);
157 return std::unique_ptr<RWMol>(ptr);
158}
159
160} // namespace RDKit
161
162#endif
Defines the editable molecule class RWMol.
The class for representing atoms.
Definition: Atom.h:68
class for representing a bond
Definition: Bond.h:47
class for flagging sanitization errors
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
SmilesParseException(const std::string msg)
Definition: SmilesParse.h:132
SmilesParseException(const char *msg)
Definition: SmilesParse.h:131
const char * what() const noexcept override
Definition: SmilesParse.h:133
~SmilesParseException() noexcept override=default
#define RDKIT_SMILESPARSE_EXPORT
Definition: export.h:457
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
Std stuff.
Definition: Abbreviations.h:19
RDKIT_SMILESPARSE_EXPORT RWMol * SmartsToMol(const std::string &sma, const SmartsParserParams &ps)
RDKIT_SMILESPARSE_EXPORT Atom * SmartsToAtom(const std::string &sma)
RDKIT_SMILESPARSE_EXPORT Bond * SmilesToBond(const std::string &smi)
RDKIT_SMILESPARSE_EXPORT RWMol * SmilesToMol(const std::string &smi, const SmilesParserParams &params)
RDKIT_SMILESPARSE_EXPORT Bond * SmartsToBond(const std::string &sma)
RDKIT_SMILESPARSE_EXPORT Atom * SmilesToAtom(const std::string &smi)
std::map< std::string, std::string > * replacements
Definition: SmilesParse.h:90
std::map< std::string, std::string > * replacements
Definition: SmilesParse.h:25