RDKit
Open-source cheminformatics and machine learning.
Abbreviations.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2020 Greg Landrum and T5 Informatics GmbH
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_ABBREVIATIONS_H
12 #define RD_ABBREVIATIONS_H
13 #include <vector>
14 #include <string>
15 #include <memory>
16 
17 namespace RDKit {
18 class ROMol;
19 class RWMol;
20 
21 namespace Abbreviations {
23  std::string label;
24  std::string displayLabel;
25  std::string displayLabelW;
26  std::string smarts;
27  std::shared_ptr<ROMol> mol; //! optional
28  std::vector<unsigned int> extraAttachAtoms; //! optional
29  bool operator==(const AbbreviationDefinition& other) const {
30  return label == other.label && displayLabel == other.displayLabel &&
31  displayLabelW == other.displayLabelW && smarts == other.smarts;
32  }
33  bool operator!=(const AbbreviationDefinition& other) const {
34  return !(*this == other);
35  }
36 };
38  std::vector<std::pair<int, int>> match;
40  AbbreviationMatch(const std::vector<std::pair<int, int>>& matchArg,
41  const AbbreviationDefinition& abbrevArg)
42  : match(matchArg), abbrev(abbrevArg){};
44  bool operator==(const AbbreviationMatch& other) const {
45  return abbrev == other.abbrev && match == other.match;
46  }
47  bool operator!=(const AbbreviationMatch& other) const {
48  return !(*this == other);
49  }
50 };
51 namespace common_properties {
52 RDKIT_ABBREVIATIONS_EXPORT extern const std::string numDummies;
53 }
54 namespace Utils {
55 //! returns the default set of abbreviation definitions
56 RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
58 //! returns the default set of linker definitions
59 RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
61 
62 //! parses a string describing abbreviation matches and returns the result
63 /*
64 
65 \param text the data to be parsed, see below for the format
66 \param removeExtraDummies controls whether or not dummy atoms beyond atom 0 are
67  removed. Set this to true to create abbreviations for linkers
68 \param allowConnectionToDummies allows abbreviations to directly connect to
69  abbreviations. set this to true for linkers
70 
71 Format of the text data:
72  A series of lines, each of which contains:
73 
74  label SMARTS displayLabel displayLabelW
75 
76  the "displayLabel" and "displayLabelW" fields are optional.
77  where label is the label used for the abbreviation,
78  SMARTS is the SMARTS definition of the abbreviation.
79  displayLabel is used in drawings to render the abbreviations.
80  displayLabelW is the display label if a bond comes in from the right
81 
82  Use dummies to indicate attachment points. The assumption is that the first
83  atom is a dummy (one will be added if this is not true) and that the second
84  atom is the surrogate for the rest of the group.
85 
86 */
87 RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
88 parseAbbreviations(const std::string& text, bool removeExtraDummies = false,
89  bool allowConnectionToDummies = false);
90 //! \brief equivalent to calling \c parseAbbreviations(text,true,true)
91 inline std::vector<AbbreviationDefinition> parseLinkers(
92  const std::string& text) {
93  return parseAbbreviations(text, true, true);
94 };
95 } // namespace Utils
96 
97 //! returns all matches for the abbreviations across the molecule
98 /*!
99 
100  \param abbrevs the abbreviations to look for. This list is used in order.
101  \param maxCoverage any abbreviation that covers than more than this fraction
102  of the molecule's atoms (not counting dummies) will not be returned.
103 */
104 RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationMatch>
106  const ROMol& mol, const std::vector<AbbreviationDefinition>& abbrevs,
107  double maxCoverage = 0.4);
108 //! applies the abbreviation matches to a molecule, modifying it in place.
109 //! the modified molecule is not sanitized
111  RWMol& mol, const std::vector<AbbreviationMatch>& matches);
112 //! creates "SUP" SubstanceGroups on the molecule describing the abbreviation
114  RWMol& mol, const std::vector<AbbreviationMatch>& matches);
115 //! convenience function for finding and applying abbreviations
116 //! the modified molecule is not sanitized
118  RWMol& mol, const std::vector<AbbreviationDefinition>& abbrevs,
119  double maxCoverage = 0.4, bool sanitize = true);
120 //! convenience function for finding and labeling abbreviations as SUP
121 //! SubstanceGroups
123  RWMol& mol, const std::vector<AbbreviationDefinition>& abbrevs,
124  double maxCoverage = 0.4);
125 //! collapses abbreviation (i.e. "SUP") substance groups
126 //! the modified molecule is not sanitized
128 
129 } // namespace Abbreviations
130 } // namespace RDKit
131 #endif
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
#define RDKIT_ABBREVIATIONS_EXPORT
Definition: export.h:21
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationDefinition > getDefaultAbbreviations()
returns the default set of abbreviation definitions
std::vector< AbbreviationDefinition > parseLinkers(const std::string &text)
equivalent to calling parseAbbreviations(text,true,true)
Definition: Abbreviations.h:91
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationDefinition > parseAbbreviations(const std::string &text, bool removeExtraDummies=false, bool allowConnectionToDummies=false)
parses a string describing abbreviation matches and returns the result
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationDefinition > getDefaultLinkers()
returns the default set of linker definitions
RDKIT_ABBREVIATIONS_EXPORT const std::string numDummies
RDKIT_ABBREVIATIONS_EXPORT void applyMatches(RWMol &mol, const std::vector< AbbreviationMatch > &matches)
RDKIT_ABBREVIATIONS_EXPORT void condenseAbbreviationSubstanceGroups(RWMol &mol)
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationMatch > findApplicableAbbreviationMatches(const ROMol &mol, const std::vector< AbbreviationDefinition > &abbrevs, double maxCoverage=0.4)
returns all matches for the abbreviations across the molecule
RDKIT_ABBREVIATIONS_EXPORT void labelMolAbbreviations(RWMol &mol, const std::vector< AbbreviationDefinition > &abbrevs, double maxCoverage=0.4)
RDKIT_ABBREVIATIONS_EXPORT void condenseMolAbbreviations(RWMol &mol, const std::vector< AbbreviationDefinition > &abbrevs, double maxCoverage=0.4, bool sanitize=true)
RDKIT_ABBREVIATIONS_EXPORT void labelMatches(RWMol &mol, const std::vector< AbbreviationMatch > &matches)
creates "SUP" SubstanceGroups on the molecule describing the abbreviation
Std stuff.
Definition: Abbreviations.h:17
std::vector< unsigned int > extraAttachAtoms
optional
Definition: Abbreviations.h:28
bool operator==(const AbbreviationDefinition &other) const
optional
Definition: Abbreviations.h:29
bool operator!=(const AbbreviationDefinition &other) const
Definition: Abbreviations.h:33
AbbreviationMatch(const std::vector< std::pair< int, int >> &matchArg, const AbbreviationDefinition &abbrevArg)
Definition: Abbreviations.h:40
bool operator!=(const AbbreviationMatch &other) const
Definition: Abbreviations.h:47
std::vector< std::pair< int, int > > match
Definition: Abbreviations.h:38
bool operator==(const AbbreviationMatch &other) const
Definition: Abbreviations.h:44