RDKit
Open-source cheminformatics and machine learning.
MolWriters.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2017 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef _RD_MOLWRITERS_H_
13 #define _RD_MOLWRITERS_H_
14 
15 #include <RDGeneral/types.h>
16 
17 #include <iostream>
18 #include <memory>
19 #include <string>
20 
21 #include <boost/noncopyable.hpp>
22 
23 #ifdef RDK_BUILD_MAEPARSER_SUPPORT
24 namespace schrodinger {
25 namespace mae {
26 class Writer;
27 } // namespace mae
28 } // namespace schrodinger
29 #endif // RDK_BUILD_MAEPARSER_SUPPORT
30 
31 #include <GraphMol/ROMol.h>
32 
33 namespace RDKit {
34 
35 static int defaultConfId = -1;
36 
37 class RDKIT_FILEPARSERS_EXPORT MolWriter : private boost::noncopyable {
38  public:
39  virtual ~MolWriter() {}
40  virtual void write(const ROMol &mol, int confId = defaultConfId) = 0;
41  virtual void flush() = 0;
42  virtual void close() = 0;
43  virtual void setProps(const STR_VECT &propNames) = 0;
44  virtual unsigned int numMols() const = 0;
45 };
46 
47 //! The SmilesWriter is for writing molecules and properties to
48 //! delimited text files.
50  /******************************************************************************
51  * A Smiles Table writer - this is how it is used
52  * - create a SmilesWriter with a output file name (or a ostream), a
53  *delimiter,
54  * and a list of properties that need to be written out
55  * - then a call is made to the write function for each molecule that needs
56  *to
57  * be written out
58  ******************************************************************************/
59  public:
60  /*!
61  \param fileName : filename to write to ("-" to write to stdout)
62  \param delimiter : delimiter to use in the text file
63  \param nameHeader : used to label the name column in the output. If this
64  is provided as the empty string, no names will be
65  written.
66  \param includeHeader : toggles inclusion of a header line in the output
67  \param isomericSmiles : toggles generation of isomeric SMILES
68  \param kekuleSmiles : toggles the generation of kekule SMILES
69 
70  */
71  SmilesWriter(const std::string &fileName, const std::string &delimiter = " ",
72  const std::string &nameHeader = "Name",
73  bool includeHeader = true, bool isomericSmiles = true,
74  bool kekuleSmiles = false);
75  //! \overload
76  SmilesWriter(std::ostream *outStream, std::string delimiter = " ",
77  std::string nameHeader = "Name", bool includeHeader = true,
78  bool takeOwnership = false, bool isomericSmiles = true,
79  bool kekuleSmiles = false);
80 
81  ~SmilesWriter() override;
82 
83  //! \brief set a vector of property names that are need to be
84  //! written out for each molecule
85  void setProps(const STR_VECT &propNames) override;
86 
87  //! \brief write a new molecule to the file
88  void write(const ROMol &mol, int confId = defaultConfId) override;
89 
90  //! \brief flush the ostream
91  void flush() override {
92  PRECONDITION(dp_ostream, "no output stream");
93  try {
94  dp_ostream->flush();
95  } catch (...) {
96  try {
97  if (dp_ostream->good()) {
98  dp_ostream->setstate(std::ios::badbit);
99  }
100  } catch (const std::runtime_error &) {
101  }
102  }
103  }
104 
105  //! \brief close our stream (the writer cannot be used again)
106  void close() override {
107  if (dp_ostream) {
108  flush();
109  }
110  if (df_owner) {
111  delete dp_ostream;
112  df_owner = false;
113  }
114  dp_ostream = nullptr;
115  }
116 
117  //! \brief get the number of molecules written so far
118  unsigned int numMols() const override { return d_molid; }
119 
120  private:
121  // local initialization
122  void init(const std::string &delimiter, const std::string &nameHeader,
123  bool includeHeader, bool isomericSmiles, bool kekuleSmiles);
124 
125  // dumps a header line to the output stream
126  void dumpHeader() const;
127 
128  std::ostream *dp_ostream;
129  bool df_owner;
130  bool df_includeHeader; // whether or not to include a title line
131  unsigned int d_molid; // the number of the molecules we wrote so far
132  std::string d_delim; // delimiter string between various records
133  std::string d_nameHeader; // header for the name column in the output file
134  STR_VECT d_props; // list of property name that need to be written out
135  bool df_isomericSmiles; // whether or not to do isomeric smiles
136  bool df_kekuleSmiles; // whether or not to do kekule smiles
137 };
138 
139 //! The SDWriter is for writing molecules and properties to
140 //! SD files
142  /**************************************************************************************
143  * A SD file ( or stream) writer - this is how it is used
144  * - create a SDWriter with a output file name (or a ostream),
145  * and a list of properties that need to be written out
146  * - then a call is made to the write function for each molecule that needs
147  *to be written out
148  **********************************************************************************************/
149  public:
150  /*!
151  \param fileName : filename to write to ("-" to write to stdout)
152  */
153  SDWriter(const std::string &fileName);
154  SDWriter(std::ostream *outStream, bool takeOwnership = false);
155 
156  ~SDWriter() override;
157 
158  //! \brief set a vector of property names that are need to be
159  //! written out for each molecule
160  void setProps(const STR_VECT &propNames) override;
161 
162  //! \brief return the text that would be written to the file
163  static std::string getText(const ROMol &mol, int confId = defaultConfId,
164  bool kekulize = true, bool force_V3000 = false,
165  int molid = -1, STR_VECT *propNames = nullptr);
166 
167  //! \brief write a new molecule to the file
168  void write(const ROMol &mol, int confId = defaultConfId) override;
169 
170  //! \brief flush the ostream
171  void flush() override {
172  PRECONDITION(dp_ostream, "no output stream");
173  try {
174  dp_ostream->flush();
175  } catch (...) {
176  try {
177  if (dp_ostream->good()) {
178  dp_ostream->setstate(std::ios::badbit);
179  }
180  } catch (const std::runtime_error &) {
181  }
182  }
183  }
184 
185  //! \brief close our stream (the writer cannot be used again)
186  void close() override {
187  if (dp_ostream) {
188  flush();
189  }
190  if (df_owner) {
191  delete dp_ostream;
192  df_owner = false;
193  }
194  dp_ostream = nullptr;
195  }
196 
197  //! \brief get the number of molecules written so far
198  unsigned int numMols() const override { return d_molid; }
199 
200  void setForceV3000(bool val) { df_forceV3000 = val; }
201  bool getForceV3000() const { return df_forceV3000; }
202 
203  void setKekulize(bool val) { df_kekulize = val; }
204  bool getKekulize() const { return df_kekulize; }
205 
206  private:
207  void writeProperty(const ROMol &mol, const std::string &name);
208 
209  std::ostream *dp_ostream;
210  bool df_owner;
211  unsigned int d_molid; // the number of the molecules we wrote so far
212  STR_VECT d_props; // list of property name that need to be written out
213  bool df_forceV3000; // force writing the mol blocks as V3000
214  bool df_kekulize; // toggle kekulization of molecules on writing
215 };
216 
217 //! The TDTWriter is for writing molecules and properties to
218 //! TDT files
220  /**************************************************************************************
221  * A TDT file ( or stream) writer - this is how it is used
222  * - create a TDTWriter with a output file name (or a ostream),
223  * and a list of properties that need to be written out
224  * - then a call is made to the write function for each molecule that needs
225  *to be written out
226  **********************************************************************************************/
227  public:
228  /*!
229  \param fileName : filename to write to ("-" to write to stdout)
230  */
231  TDTWriter(const std::string &fileName);
232  TDTWriter(std::ostream *outStream, bool takeOwnership = false);
233 
234  ~TDTWriter() override;
235 
236  //! \brief set a vector of property names that are need to be
237  //! written out for each molecule
238  void setProps(const STR_VECT &propNames) override;
239 
240  //! \brief write a new molecule to the file
241  void write(const ROMol &mol, int confId = defaultConfId) override;
242 
243  //! \brief flush the ostream
244  void flush() override {
245  PRECONDITION(dp_ostream, "no output stream");
246  try {
247  dp_ostream->flush();
248  } catch (...) {
249  try {
250  if (dp_ostream->good()) {
251  dp_ostream->setstate(std::ios::badbit);
252  }
253  } catch (const std::runtime_error &) {
254  }
255  }
256  }
257 
258  //! \brief close our stream (the writer cannot be used again)
259  void close() override {
260  if (dp_ostream) {
261  // if we've written any mols, finish with a "|" line
262  if (d_molid > 0) {
263  *dp_ostream << "|\n";
264  }
265  flush();
266  }
267  if (df_owner) {
268  delete dp_ostream;
269  df_owner = false;
270  }
271  dp_ostream = nullptr;
272  }
273 
274  //! \brief get the number of molecules written so far
275  unsigned int numMols() const override { return d_molid; }
276 
277  void setWrite2D(bool state = true) { df_write2D = state; }
278  bool getWrite2D() const { return df_write2D; }
279 
280  void setWriteNames(bool state = true) { df_writeNames = state; }
281  bool getWriteNames() const { return df_writeNames; }
282 
283  void setNumDigits(unsigned int numDigits) { d_numDigits = numDigits; }
284  unsigned int getNumDigits() const { return d_numDigits; }
285 
286  private:
287  void writeProperty(const ROMol &mol, const std::string &name);
288 
289  std::ostream *dp_ostream;
290  bool df_owner;
291  unsigned int d_molid; // the number of molecules we wrote so far
292  STR_VECT d_props; // list of property name that need to be written out
293  bool df_write2D; // write 2D coordinates instead of 3D
294  bool df_writeNames; // write a name record for each molecule
295  unsigned int
296  d_numDigits; // number of digits to use in our output of coordinates;
297 };
298 
299 //! The PDBWriter is for writing molecules to Brookhaven Protein
300 //! DataBank format files.
302  public:
303  PDBWriter(const std::string &fileName, unsigned int flavor = 0);
304  PDBWriter(std::ostream *outStream, bool takeOwnership = false,
305  unsigned int flavor = 0);
306  ~PDBWriter() override;
307 
308  //! \brief write a new molecule to the file
309  void write(const ROMol &mol, int confId = defaultConfId) override;
310 
311  void setProps(const STR_VECT &) override {}
312 
313  //! \brief flush the ostream
314  void flush() override {
315  PRECONDITION(dp_ostream, "no output stream");
316  try {
317  dp_ostream->flush();
318  } catch (...) {
319  try {
320  if (dp_ostream->good()) {
321  dp_ostream->setstate(std::ios::badbit);
322  }
323  } catch (const std::runtime_error &) {
324  }
325  }
326  }
327 
328  //! \brief close our stream (the writer cannot be used again)
329  void close() override {
330  if (dp_ostream) {
331  flush();
332  }
333  if (df_owner) {
334  delete dp_ostream;
335  df_owner = false;
336  }
337  dp_ostream = nullptr;
338  }
339 
340  //! \brief get the number of molecules written so far
341  unsigned int numMols() const override { return d_count; }
342 
343  private:
344  std::ostream *dp_ostream;
345  unsigned int d_flavor;
346  unsigned int d_count;
347  bool df_owner;
348 };
349 
350 #ifdef RDK_BUILD_MAEPARSER_SUPPORT
351 
352 class RDKIT_FILEPARSERS_EXPORT MaeWriter : public MolWriter {
353  /**************************************************************************************
354  * A highly experimental Maestro file (or stream) writer. Many features are
355  * not supported yet, e.g. chirality and bond stereo, stereo groups, substance
356  * groups, isotopes or dummy atoms. Note that except for stereochemistry
357  * labels these aren't supported by the MaeMolSupplier either.
358  *
359  * Usage:
360  * - create a MaeWriter with an output file name (or a ostream),
361  * and a list of mol/atom/bond properties that need to be written out.
362  * If no property names are specified, all properties will be exported.
363  * Properties that are specified, but are not present will be ignored.
364  * - then, a call is made to the write function for each molecule
365  * that needs to be written out.
366  *
367  * Notes:
368  * - kekulization is mandatory, as the Maestro format does not
369  * have the concept of an aromatic bond.
370  * - Ownership of the output stream is mandatory, since it needs
371  * to be managed though a shared_ptr, as this is what maeparser
372  * writer takes.
373  ***************************************************************************************/
374  public:
375  /*!
376  \param fileName : filename to write to (stdout is *not* supported)
377  */
378  MaeWriter(const std::string &fileName);
379 
380  /*!
381  \note Note that this takes ownership of the output stream.
382  */
383  MaeWriter(std::ostream *outStream);
384 
385  MaeWriter(std::shared_ptr<std::ostream> outStream);
386 
387  ~MaeWriter() override;
388 
389  //! \brief set a vector of property names that are need to be
390  //! written out for each molecule
391  void setProps(const STR_VECT &propNames) override;
392 
393  //! \brief return the text that would be written to the file
394  static std::string getText(const ROMol &mol, int confId = defaultConfId,
395  const STR_VECT &propNames = STR_VECT());
396 
397  //! \brief write a new molecule to the file.
398  void write(const ROMol &mol, int confId = defaultConfId);
399 
400  //! \brief flush the ostream
401  void flush() override;
402  //! \brief close our stream (the writer cannot be used again)
403  void close() override;
404 
405  //! \brief get the number of molecules written so far
406  unsigned int numMols() const override { return d_molid; }
407 
408  protected:
409  MaeWriter() = default; // used in the Python wrapper
410 
411  std::shared_ptr<std::ostream> dp_ostream;
412 
413  private:
414  void open();
415 
416  std::unique_ptr<schrodinger::mae::Writer> dp_writer;
417  unsigned d_molid = 0; // the number of the molecules we wrote so far
418  STR_VECT d_props; // list of property name that need to be written out
419 };
420 
421 #endif // RDK_BUILD_MAEPARSER_SUPPORT
422 
423 } // namespace RDKit
424 
425 #endif
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
virtual void flush()=0
virtual void write(const ROMol &mol, int confId=defaultConfId)=0
virtual ~MolWriter()
Definition: MolWriters.h:39
virtual void close()=0
virtual void setProps(const STR_VECT &propNames)=0
virtual unsigned int numMols() const =0
PDBWriter(const std::string &fileName, unsigned int flavor=0)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
void flush() override
flush the ostream
Definition: MolWriters.h:314
PDBWriter(std::ostream *outStream, bool takeOwnership=false, unsigned int flavor=0)
void setProps(const STR_VECT &) override
Definition: MolWriters.h:311
~PDBWriter() override
unsigned int numMols() const override
get the number of molecules written so far
Definition: MolWriters.h:341
void close() override
close our stream (the writer cannot be used again)
Definition: MolWriters.h:329
~SDWriter() override
bool getForceV3000() const
Definition: MolWriters.h:201
unsigned int numMols() const override
get the number of molecules written so far
Definition: MolWriters.h:198
SDWriter(std::ostream *outStream, bool takeOwnership=false)
bool getKekulize() const
Definition: MolWriters.h:204
void flush() override
flush the ostream
Definition: MolWriters.h:171
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
static std::string getText(const ROMol &mol, int confId=defaultConfId, bool kekulize=true, bool force_V3000=false, int molid=-1, STR_VECT *propNames=nullptr)
return the text that would be written to the file
void close() override
close our stream (the writer cannot be used again)
Definition: MolWriters.h:186
void setForceV3000(bool val)
Definition: MolWriters.h:200
SDWriter(const std::string &fileName)
void setKekulize(bool val)
Definition: MolWriters.h:203
unsigned int numMols() const override
get the number of molecules written so far
Definition: MolWriters.h:118
SmilesWriter(const std::string &fileName, const std::string &delimiter=" ", const std::string &nameHeader="Name", bool includeHeader=true, bool isomericSmiles=true, bool kekuleSmiles=false)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
~SmilesWriter() override
SmilesWriter(std::ostream *outStream, std::string delimiter=" ", std::string nameHeader="Name", bool includeHeader=true, bool takeOwnership=false, bool isomericSmiles=true, bool kekuleSmiles=false)
This is an overloaded member function, provided for convenience. It differs from the above function o...
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
void close() override
close our stream (the writer cannot be used again)
Definition: MolWriters.h:106
void flush() override
flush the ostream
Definition: MolWriters.h:91
~TDTWriter() override
bool getWrite2D() const
Definition: MolWriters.h:278
void setNumDigits(unsigned int numDigits)
Definition: MolWriters.h:283
void setWrite2D(bool state=true)
Definition: MolWriters.h:277
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
unsigned int numMols() const override
get the number of molecules written so far
Definition: MolWriters.h:275
unsigned int getNumDigits() const
Definition: MolWriters.h:284
TDTWriter(std::ostream *outStream, bool takeOwnership=false)
void close() override
close our stream (the writer cannot be used again)
Definition: MolWriters.h:259
TDTWriter(const std::string &fileName)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
void setWriteNames(bool state=true)
Definition: MolWriters.h:280
void flush() override
flush the ostream
Definition: MolWriters.h:244
bool getWriteNames() const
Definition: MolWriters.h:281
#define RDKIT_FILEPARSERS_EXPORT
Definition: export.h:161
Std stuff.
Definition: Abbreviations.h:19
std::vector< std::string > STR_VECT
Definition: Dict.h:29
static int defaultConfId
Definition: MolWriters.h:35