RDKit
Open-source cheminformatics and machine learning.
new_canon.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Greg Landrum
3 // Adapted from pseudo-code from Roger Sayle
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 
12 #include <RDGeneral/export.h>
13 #include <RDGeneral/hanoiSort.h>
14 #include <GraphMol/ROMol.h>
15 #include <GraphMol/RingInfo.h>
16 #include <GraphMol/StereoGroup.h>
18 #include <cstdint>
19 #include <boost/dynamic_bitset.hpp>
21 #include <cstring>
22 #include <iostream>
23 #include <cassert>
24 #include <cstring>
25 #include <vector>
26 
27 // #define VERBOSE_CANON 1
28 
29 namespace RDKit {
30 namespace Canon {
31 struct canon_atom;
32 
34  Bond::BondType bondType{Bond::BondType::UNSPECIFIED};
35  unsigned int bondStereo{
36  static_cast<unsigned int>(Bond::BondStereo::STEREONONE)};
37  unsigned int nbrSymClass{0};
38  unsigned int nbrIdx{0};
39  Bond::BondStereo stype{Bond::BondStereo::STEREONONE};
40  const canon_atom *controllingAtoms[4]{nullptr, nullptr, nullptr, nullptr};
41  const std::string *p_symbol{
42  nullptr}; // if provided, this is used to order bonds
43  unsigned int bondIdx{0};
44 
46  bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni,
47  unsigned int nsc, unsigned int bidx)
48  : bondType(bt),
49  bondStereo(static_cast<unsigned int>(bs)),
50  nbrSymClass(nsc),
51  nbrIdx(ni),
52  bondIdx(bidx) {}
53  bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
54  unsigned int nsc, unsigned int bidx)
55  : bondType(bt),
56  bondStereo(bs),
57  nbrSymClass(nsc),
58  nbrIdx(ni),
59  bondIdx(bidx) {}
60 
61  int compareStereo(const bondholder &o) const;
62 
63  bool operator<(const bondholder &o) const { return compare(*this, o) < 0; }
64  static bool greater(const bondholder &lhs, const bondholder &rhs) {
65  return compare(lhs, rhs) > 0;
66  }
67 
68  static int compare(const bondholder &x, const bondholder &y,
69  unsigned int div = 1) {
70  if (x.p_symbol && y.p_symbol) {
71  if ((*x.p_symbol) < (*y.p_symbol)) {
72  return -1;
73  } else if ((*x.p_symbol) > (*y.p_symbol)) {
74  return 1;
75  }
76  }
77  if (x.bondType < y.bondType) {
78  return -1;
79  } else if (x.bondType > y.bondType) {
80  return 1;
81  }
82  if (x.bondStereo < y.bondStereo) {
83  return -1;
84  } else if (x.bondStereo > y.bondStereo) {
85  return 1;
86  }
87  auto scdiv = x.nbrSymClass / div - y.nbrSymClass / div;
88  if (scdiv) {
89  return scdiv;
90  }
91  if (x.bondStereo && y.bondStereo) {
92  auto cs = x.compareStereo(y);
93  if (cs) {
94  return cs;
95  }
96  }
97  return 0;
98  }
99 };
101  const Atom *atom{nullptr};
102  int index{-1};
103  unsigned int degree{0};
104  unsigned int totalNumHs{0};
105  bool hasRingNbr{false};
106  bool isRingStereoAtom{false};
107  unsigned int whichStereoGroup{0};
109  int *nbrIds{nullptr};
110  const std::string *p_symbol{
111  nullptr}; // if provided, this is used to order atoms
112  std::vector<int> neighborNum;
113  std::vector<int> revistedNeighbors;
114  std::vector<bondholder> bonds;
115 
117 
118  ~canon_atom() { free(nbrIds); }
119 };
120 
122  canon_atom *atoms, std::vector<bondholder> &nbrs);
123 
125  canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
126  std::vector<std::pair<unsigned int, unsigned int>> &result);
127 
128 /*
129  * Different types of atom compare functions:
130  *
131  * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
132  *dependent chirality
133  * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
134  *highly symmetrical graphs/molecules
135  * - AtomCompareFunctor: Basic atom compare function which also allows to
136  *include neighbors within the ranking
137  */
138 
140  public:
141  Canon::canon_atom *dp_atoms{nullptr};
142  const ROMol *dp_mol{nullptr};
143  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
144  *dp_bondsInPlay{nullptr};
145 
148  Canon::canon_atom *atoms, const ROMol &m,
149  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
150  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
151  : dp_atoms(atoms),
152  dp_mol(&m),
153  dp_atomsInPlay(atomsInPlay),
154  dp_bondsInPlay(bondsInPlay) {}
155  int operator()(int i, int j) const {
156  PRECONDITION(dp_atoms, "no atoms");
157  PRECONDITION(dp_mol, "no molecule");
158  PRECONDITION(i != j, "bad call");
159  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
160  return 0;
161  }
162 
163  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
164  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
165  }
166  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
167  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
168  }
169  for (unsigned int ii = 0;
170  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
171  int cmp =
172  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
173  if (cmp) {
174  return cmp;
175  }
176  }
177 
178  std::vector<std::pair<unsigned int, unsigned int>> swapsi;
179  std::vector<std::pair<unsigned int, unsigned int>> swapsj;
180  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
181  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
182  }
183  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
184  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
185  }
186  for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
187  int cmp = swapsi[ii].second - swapsj[ii].second;
188  if (cmp) {
189  return cmp;
190  }
191  }
192  return 0;
193  }
194 };
195 
197  public:
198  Canon::canon_atom *dp_atoms{nullptr};
199  const ROMol *dp_mol{nullptr};
200  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
201  *dp_bondsInPlay{nullptr};
202 
205  Canon::canon_atom *atoms, const ROMol &m,
206  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
207  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
208  : dp_atoms(atoms),
209  dp_mol(&m),
210  dp_atomsInPlay(atomsInPlay),
211  dp_bondsInPlay(bondsInPlay) {}
212  int operator()(int i, int j) const {
213  PRECONDITION(dp_atoms, "no atoms");
214  PRECONDITION(dp_mol, "no molecule");
215  PRECONDITION(i != j, "bad call");
216  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
217  return 0;
218  }
219 
220  if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
221  return -1;
222  } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
223  return 1;
224  }
225 
226  if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
227  return -1;
228  } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
229  return 1;
230  }
231 
232  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
233  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
234  }
235  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
236  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
237  }
238  for (unsigned int ii = 0;
239  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
240  int cmp =
241  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
242  if (cmp) {
243  return cmp;
244  }
245  }
246 
247  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
248  return -1;
249  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
250  return 1;
251  }
252  return 0;
253  }
254 };
255 
256 namespace {
257 unsigned int getChiralRank(const ROMol *dp_mol, canon_atom *dp_atoms,
258  unsigned int i) {
259  unsigned int res = 0;
260  std::vector<unsigned int> perm;
261  perm.reserve(dp_atoms[i].atom->getDegree());
262  for (const auto nbr : dp_mol->atomNeighbors(dp_atoms[i].atom)) {
263  auto rnk = dp_atoms[nbr->getIdx()].index;
264  // make sure we don't have duplicate ranks
265  if (std::find(perm.begin(), perm.end(), rnk) != perm.end()) {
266  break;
267  } else {
268  perm.push_back(rnk);
269  }
270  }
271  if (perm.size() == dp_atoms[i].atom->getDegree()) {
272  auto ctag = dp_atoms[i].atom->getChiralTag();
273  if (ctag == Atom::ChiralType::CHI_TETRAHEDRAL_CW ||
274  ctag == Atom::ChiralType::CHI_TETRAHEDRAL_CCW) {
275  auto sortedPerm = perm;
276  std::sort(sortedPerm.begin(), sortedPerm.end());
277  auto nswaps = countSwapsToInterconvert(perm, sortedPerm);
278  res = ctag == Atom::ChiralType::CHI_TETRAHEDRAL_CW ? 2 : 1;
279  if (nswaps % 2) {
280  res = res == 2 ? 1 : 2;
281  }
282  }
283  }
284  return res;
285 }
286 } // namespace
288  unsigned int getAtomRingNbrCode(unsigned int i) const {
289  if (!dp_atoms[i].hasRingNbr) {
290  return 0;
291  }
292 
293  int *nbrs = dp_atoms[i].nbrIds;
294  unsigned int code = 0;
295  for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
296  if (dp_atoms[nbrs[j]].isRingStereoAtom) {
297  code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
298  }
299  }
300  return code;
301  }
302 
303  int basecomp(int i, int j) const {
304  unsigned int ivi, ivj;
305 
306  // always start with the current class:
307  ivi = dp_atoms[i].index;
308  ivj = dp_atoms[j].index;
309  if (ivi < ivj) {
310  return -1;
311  } else if (ivi > ivj) {
312  return 1;
313  }
314  // use the atom-mapping numbers if they were assigned
315  /* boost::any_cast FILED here:
316  std::string molAtomMapNumber_i="";
317  std::string molAtomMapNumber_j="";
318  */
319  int molAtomMapNumber_i = 0;
320  int molAtomMapNumber_j = 0;
321  dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
322  molAtomMapNumber_i);
323  dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
324  molAtomMapNumber_j);
325  if (molAtomMapNumber_i < molAtomMapNumber_j) {
326  return -1;
327  } else if (molAtomMapNumber_i > molAtomMapNumber_j) {
328  return 1;
329  }
330  // start by comparing degree
331  ivi = dp_atoms[i].degree;
332  ivj = dp_atoms[j].degree;
333  if (ivi < ivj) {
334  return -1;
335  } else if (ivi > ivj) {
336  return 1;
337  }
338  if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
339  if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol)) {
340  return -1;
341  } else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol)) {
342  return 1;
343  } else {
344  return 0;
345  }
346  }
347 
348  // move onto atomic number
349  ivi = dp_atoms[i].atom->getAtomicNum();
350  ivj = dp_atoms[j].atom->getAtomicNum();
351  if (ivi < ivj) {
352  return -1;
353  } else if (ivi > ivj) {
354  return 1;
355  }
356  // isotopes if we're using them
357  if (df_useIsotopes) {
358  ivi = dp_atoms[i].atom->getIsotope();
359  ivj = dp_atoms[j].atom->getIsotope();
360  if (ivi < ivj) {
361  return -1;
362  } else if (ivi > ivj) {
363  return 1;
364  }
365  }
366 
367  // nHs
368  ivi = dp_atoms[i].totalNumHs;
369  ivj = dp_atoms[j].totalNumHs;
370  if (ivi < ivj) {
371  return -1;
372  } else if (ivi > ivj) {
373  return 1;
374  }
375  // charge
376  ivi = dp_atoms[i].atom->getFormalCharge();
377  ivj = dp_atoms[j].atom->getFormalCharge();
378  if (ivi < ivj) {
379  return -1;
380  } else if (ivi > ivj) {
381  return 1;
382  }
383  // chirality if we're using it
384  if (df_useChirality) {
385  // look at enhanced stereo
386  ivi = dp_atoms[i].whichStereoGroup; // can't use the index itself, but if
387  // it's set then we're in an SG
388  ivj = dp_atoms[j].whichStereoGroup;
389  if (ivi || ivj) {
390  if (ivi && !ivj) {
391  return 1;
392  } else if (ivj && !ivi) {
393  return -1;
394  } else if (ivi && ivj) {
395  ivi = static_cast<unsigned int>(dp_atoms[i].typeOfStereoGroup);
396  ivj = static_cast<unsigned int>(dp_atoms[j].typeOfStereoGroup);
397  if (ivi < ivj) {
398  return -1;
399  } else if (ivi > ivj) {
400  return 1;
401  }
402  ivi = dp_atoms[i].whichStereoGroup - 1;
403  ivj = dp_atoms[j].whichStereoGroup - 1;
404  // now check the current classes of the other members of the SG
405  std::set<unsigned int> sgi;
406  for (auto sgat : dp_mol->getStereoGroups()[ivi].getAtoms()) {
407  sgi.insert(dp_atoms[sgat->getIdx()].index);
408  }
409  std::set<unsigned int> sgj;
410  for (auto sgat : dp_mol->getStereoGroups()[ivj].getAtoms()) {
411  sgj.insert(dp_atoms[sgat->getIdx()].index);
412  }
413  if (sgi < sgj) {
414  return -1;
415  } else if (sgi > sgj) {
416  return 1;
417  }
418  }
419  } else {
420  // if there's no stereogroup, then use whatever atom stereochem is
421  // specfied:
422  ivi = 0;
423  ivj = 0;
424  // can't actually use values here, because they are arbitrary
425  ivi = dp_atoms[i].atom->getChiralTag() != 0;
426  ivj = dp_atoms[j].atom->getChiralTag() != 0;
427  if (ivi < ivj) {
428  return -1;
429  } else if (ivi > ivj) {
430  return 1;
431  }
432  // stereo set
433  if (ivi && ivj) {
434  if (ivi) {
435  ivi = getChiralRank(dp_mol, dp_atoms, i);
436  }
437  if (ivj) {
438  ivj = getChiralRank(dp_mol, dp_atoms, j);
439  }
440  if (ivi < ivj) {
441  return -1;
442  } else if (ivi > ivj) {
443  return 1;
444  }
445  }
446  }
447  }
448 
449  if (df_useChiralityRings) {
450  // ring stereochemistry
451  ivi = getAtomRingNbrCode(i);
452  ivj = getAtomRingNbrCode(j);
453  if (ivi < ivj) {
454  return -1;
455  } else if (ivi > ivj) {
456  return 1;
457  } // bond stereo is taken care of in the neighborhood comparison
458  }
459  return 0;
460  }
461 
462  public:
463  Canon::canon_atom *dp_atoms{nullptr};
464  const ROMol *dp_mol{nullptr};
465  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
466  *dp_bondsInPlay{nullptr};
467  bool df_useNbrs{false};
468  bool df_useIsotopes{true};
469  bool df_useChirality{true};
470  bool df_useChiralityRings{true};
471 
474  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
475  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
476  : dp_atoms(atoms),
477  dp_mol(&m),
478  dp_atomsInPlay(atomsInPlay),
479  dp_bondsInPlay(bondsInPlay),
480  df_useNbrs(false),
481  df_useIsotopes(true),
482  df_useChirality(true),
483  df_useChiralityRings(true) {}
484  int operator()(int i, int j) const {
485  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
486  return 0;
487  }
488  int v = basecomp(i, j);
489  if (v) {
490  return v;
491  }
492 
493  if (df_useNbrs) {
494  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
495  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
496  }
497  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
498  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
499  }
500 
501  for (unsigned int ii = 0;
502  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
503  ++ii) {
504  int cmp =
505  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
506  if (cmp) {
507  return cmp;
508  }
509  }
510 
511  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
512  return -1;
513  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
514  return 1;
515  }
516  }
517  return 0;
518  }
519 };
520 
521 /*
522  * A compare function to discriminate chiral atoms, similar to the CIP rules.
523  * This functionality is currently not used.
524  */
525 
526 const unsigned int ATNUM_CLASS_OFFSET = 10000;
528  void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
529  for (unsigned j = 0; j < nbrs.size(); ++j) {
530  unsigned int nbrIdx = nbrs[j].nbrIdx;
531  if (nbrIdx == ATNUM_CLASS_OFFSET) {
532  // Ignore the Hs
533  continue;
534  }
535  const Atom *nbr = dp_atoms[nbrIdx].atom;
536  nbrs[j].nbrSymClass =
537  nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
538  }
539  std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
540  // FIX: don't want to be doing this long-term
541  }
542 
543  int basecomp(int i, int j) const {
544  PRECONDITION(dp_atoms, "no atoms");
545  unsigned int ivi, ivj;
546 
547  // always start with the current class:
548  ivi = dp_atoms[i].index;
549  ivj = dp_atoms[j].index;
550  if (ivi < ivj) {
551  return -1;
552  } else if (ivi > ivj) {
553  return 1;
554  }
555 
556  // move onto atomic number
557  ivi = dp_atoms[i].atom->getAtomicNum();
558  ivj = dp_atoms[j].atom->getAtomicNum();
559  if (ivi < ivj) {
560  return -1;
561  } else if (ivi > ivj) {
562  return 1;
563  }
564 
565  // isotopes:
566  ivi = dp_atoms[i].atom->getIsotope();
567  ivj = dp_atoms[j].atom->getIsotope();
568  if (ivi < ivj) {
569  return -1;
570  } else if (ivi > ivj) {
571  return 1;
572  }
573 
574  // atom stereochem:
575  ivi = 0;
576  ivj = 0;
577  std::string cipCode;
578  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
579  cipCode)) {
580  ivi = cipCode == "R" ? 2 : 1;
581  }
582  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
583  cipCode)) {
584  ivj = cipCode == "R" ? 2 : 1;
585  }
586  if (ivi < ivj) {
587  return -1;
588  } else if (ivi > ivj) {
589  return 1;
590  }
591 
592  // bond stereo is taken care of in the neighborhood comparison
593  return 0;
594  }
595 
596  public:
597  Canon::canon_atom *dp_atoms{nullptr};
598  const ROMol *dp_mol{nullptr};
599  bool df_useNbrs{false};
602  : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false) {}
603  int operator()(int i, int j) const {
604  PRECONDITION(dp_atoms, "no atoms");
605  PRECONDITION(dp_mol, "no molecule");
606  PRECONDITION(i != j, "bad call");
607  int v = basecomp(i, j);
608  if (v) {
609  return v;
610  }
611 
612  if (df_useNbrs) {
613  getAtomNeighborhood(dp_atoms[i].bonds);
614  getAtomNeighborhood(dp_atoms[j].bonds);
615 
616  // we do two passes through the neighbor lists. The first just uses the
617  // atomic numbers (by passing the optional 10000 to bondholder::compare),
618  // the second takes the already-computed index into account
619  for (unsigned int ii = 0;
620  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
621  ++ii) {
622  int cmp = bondholder::compare(
623  dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
624  if (cmp) {
625  return cmp;
626  }
627  }
628  for (unsigned int ii = 0;
629  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
630  ++ii) {
631  int cmp =
632  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
633  if (cmp) {
634  return cmp;
635  }
636  }
637  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
638  return -1;
639  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
640  return 1;
641  }
642  }
643  return 0;
644  }
645 };
646 
647 /*
648  * Basic canonicalization function to organize the partitions which will be
649  * sorted next.
650  * */
651 
652 template <typename CompareFunc>
653 void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
654  int mode, int *order, int *count, int &activeset,
655  int *next, int *changed, char *touchedPartitions) {
656  unsigned int nAtoms = mol.getNumAtoms();
657  int partition;
658  int symclass = 0;
659  int *start;
660  int offset;
661  int index;
662  int len;
663  int i;
664  // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
665 
666  // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
667  while (activeset != -1) {
668  // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
669  // std::cerr<<" next: ";
670  // for(unsigned int ii=0;ii<nAtoms;++ii){
671  // std::cerr<<ii<<":"<<next[ii]<<" ";
672  // }
673  // std::cerr<<std::endl;
674  // for(unsigned int ii=0;ii<nAtoms;++ii){
675  // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
676  // "<<atoms[order[ii]].index<<std::endl;
677  // }
678 
679  partition = activeset;
680  activeset = next[partition];
681  next[partition] = -2;
682 
683  len = count[partition];
684  offset = atoms[partition].index;
685  start = order + offset;
686  // std::cerr<<"\n\n**************************************************************"<<std::endl;
687  // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
688  // for(unsigned int ii=0;ii<len;++ii){
689  // std::cerr<<" "<<order[offset+ii]+1;
690  // }
691  // std::cerr<<std::endl;
692  // for(unsigned int ii=0;ii<nAtoms;++ii){
693  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
694  // "<<atoms[order[ii]].index<<std::endl;
695  // }
696  hanoisort(start, len, count, changed, compar);
697  // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
698  // std::cerr<<" result:";
699  // for(unsigned int ii=0;ii<nAtoms;++ii){
700  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
701  // "<<atoms[order[ii]].index<<std::endl;
702  // }
703  for (int k = 0; k < len; ++k) {
704  changed[start[k]] = 0;
705  }
706 
707  index = start[0];
708  // std::cerr<<" len:"<<len<<" index:"<<index<<"
709  // count:"<<count[index]<<std::endl;
710  for (i = count[index]; i < len; i++) {
711  index = start[i];
712  if (count[index]) {
713  symclass = offset + i;
714  }
715  atoms[index].index = symclass;
716  // std::cerr<<" "<<index+1<<"("<<symclass<<")";
717  // if(mode && (activeset<0 || count[index]>count[activeset]) ){
718  // activeset=index;
719  //}
720  for (unsigned j = 0; j < atoms[index].degree; ++j) {
721  changed[atoms[index].nbrIds[j]] = 1;
722  }
723  }
724  // std::cerr<<std::endl;
725 
726  if (mode) {
727  index = start[0];
728  for (i = count[index]; i < len; i++) {
729  index = start[i];
730  for (unsigned j = 0; j < atoms[index].degree; ++j) {
731  unsigned int nbor = atoms[index].nbrIds[j];
732  touchedPartitions[atoms[nbor].index] = 1;
733  }
734  }
735  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
736  if (touchedPartitions[ii]) {
737  partition = order[ii];
738  if ((count[partition] > 1) && (next[partition] == -2)) {
739  next[partition] = activeset;
740  activeset = partition;
741  }
742  touchedPartitions[ii] = 0;
743  }
744  }
745  }
746  }
747 } // end of RefinePartitions()
748 
749 template <typename CompareFunc>
750 void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
751  int mode, int *order, int *count, int &activeset, int *next,
752  int *changed, char *touchedPartitions) {
753  unsigned int nAtoms = mol.getNumAtoms();
754  int partition;
755  int offset;
756  int index;
757  int len;
758  int oldPart = 0;
759 
760  for (unsigned int i = 0; i < nAtoms; i++) {
761  partition = order[i];
762  oldPart = atoms[partition].index;
763  while (count[partition] > 1) {
764  len = count[partition];
765  offset = atoms[partition].index + len - 1;
766  index = order[offset];
767  atoms[index].index = offset;
768  count[partition] = len - 1;
769  count[index] = 1;
770 
771  // test for ions, water molecules with no
772  if (atoms[index].degree < 1) {
773  continue;
774  }
775  for (unsigned j = 0; j < atoms[index].degree; ++j) {
776  unsigned int nbor = atoms[index].nbrIds[j];
777  touchedPartitions[atoms[nbor].index] = 1;
778  changed[nbor] = 1;
779  }
780 
781  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
782  if (touchedPartitions[ii]) {
783  int npart = order[ii];
784  if ((count[npart] > 1) && (next[npart] == -2)) {
785  next[npart] = activeset;
786  activeset = npart;
787  }
788  touchedPartitions[ii] = 0;
789  }
790  }
791  RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
792  changed, touchedPartitions);
793  }
794  // not sure if this works each time
795  if (atoms[partition].index != oldPart) {
796  i -= 1;
797  }
798  }
799 } // end of BreakTies()
800 
802  int *order, int *count,
803  canon_atom *atoms);
804 
805 RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order,
806  int *count, int &activeset,
807  int *next, int *changed);
808 
810  std::vector<unsigned int> &res,
811  bool breakTies = true,
812  bool includeChirality = true,
813  bool includeIsotopes = true);
814 
816  const ROMol &mol, std::vector<unsigned int> &res,
817  const boost::dynamic_bitset<> &atomsInPlay,
818  const boost::dynamic_bitset<> &bondsInPlay,
819  const std::vector<std::string> *atomSymbols,
820  const std::vector<std::string> *bondSymbols, bool breakTies,
821  bool includeChirality, bool includeIsotope);
822 
823 inline void rankFragmentAtoms(
824  const ROMol &mol, std::vector<unsigned int> &res,
825  const boost::dynamic_bitset<> &atomsInPlay,
826  const boost::dynamic_bitset<> &bondsInPlay,
827  const std::vector<std::string> *atomSymbols = nullptr,
828  bool breakTies = true, bool includeChirality = true,
829  bool includeIsotopes = true) {
830  rankFragmentAtoms(mol, res, atomsInPlay, bondsInPlay, atomSymbols, nullptr,
831  breakTies, includeChirality, includeIsotopes);
832 };
833 
835  std::vector<unsigned int> &res);
836 
838  std::vector<Canon::canon_atom> &atoms,
839  bool includeChirality = true);
840 
841 namespace detail {
843  std::vector<Canon::canon_atom> &atoms,
844  bool includeChirality,
845  const std::vector<std::string> *atomSymbols,
846  const std::vector<std::string> *bondSymbols,
847  const boost::dynamic_bitset<> &atomsInPlay,
848  const boost::dynamic_bitset<> &bondsInPlay,
849  bool needsInit);
850 void freeCanonAtoms(std::vector<Canon::canon_atom> &atoms);
851 template <typename T>
852 void rankWithFunctor(T &ftor, bool breakTies, int *order,
853  bool useSpecial = false, bool useChirality = false,
854  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
855  const boost::dynamic_bitset<> *bondsInPlay = nullptr);
856 
857 } // namespace detail
858 
859 } // namespace Canon
860 } // namespace RDKit
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
The class for representing atoms.
Definition: Atom.h:68
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:126
BondType
the type of Bond
Definition: Bond.h:56
BondStereo
the nature of the bond's stereochem (for cis/trans)
Definition: Bond.h:95
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:473
int operator()(int i, int j) const
Definition: new_canon.h:484
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition: new_canon.h:601
int operator()(int i, int j) const
Definition: new_canon.h:603
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:147
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:204
unsigned int getNumAtoms() const
returns our number of atoms
Definition: ROMol.h:415
const std::vector< StereoGroup > & getStereoGroups() const
Gets a reference to the groups of atoms with relative stereochemistry.
Definition: ROMol.h:781
CXXAtomIterator< const MolGraph, Atom *const, MolGraph::adjacency_iterator > atomNeighbors(Atom const *at) const
Definition: ROMol.h:284
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:225
void rankWithFunctor(T &ftor, bool breakTies, int *order, bool useSpecial=false, bool useChirality=false, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
void initFragmentCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, bool needsInit)
void freeCanonAtoms(std::vector< Canon::canon_atom > &atoms)
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int >> &result)
RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true)
RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const unsigned int ATNUM_CLASS_OFFSET
Definition: new_canon.h:526
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:750
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:653
RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, bool breakTies, bool includeChirality, bool includeIsotope)
RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
RDKIT_RDGENERAL_EXPORT const std::string _CIPCode
RDKIT_RDGENERAL_EXPORT const std::string molAtomMapNumber
Std stuff.
Definition: Abbreviations.h:19
StereoGroupType
Definition: StereoGroup.h:29
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition: hanoiSort.h:151
unsigned int countSwapsToInterconvert(const T &ref, T probe)
Definition: utils.h:54
const std::string * p_symbol
Definition: new_canon.h:41
Bond::BondType bondType
Definition: new_canon.h:34
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition: new_canon.h:64
bool operator<(const bondholder &o) const
Definition: new_canon.h:63
int compareStereo(const bondholder &o) const
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition: new_canon.h:53
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition: new_canon.h:46
unsigned int bondStereo
Definition: new_canon.h:35
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition: new_canon.h:68
unsigned int nbrSymClass
Definition: new_canon.h:37
std::vector< bondholder > bonds
Definition: new_canon.h:114
std::vector< int > revistedNeighbors
Definition: new_canon.h:113
std::vector< int > neighborNum
Definition: new_canon.h:112