24 #include <unordered_map> 37 #include "dirtyAllocator.h" 38 #include "operators.h" 40 #include "marginalTrek++.h" 41 #include "isoSpec++.h" 43 #include "element_tables.h" 53 const int* _isotopeNumbers,
54 const int* _atomCounts,
55 const double*
const * _isotopeMasses,
56 const double*
const * _isotopeProbabilities
59 dimNumber(_dimNumber),
60 isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
61 atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
62 confSize(_dimNumber * sizeof(int)),
68 setupMarginals(_isotopeMasses, _isotopeProbabilities);
88 other.disowned =
true;
93 disowned(fullcopy ? throw
std::logic_error(
"Not implemented") : true),
104 inline void Iso::setupMarginals(
const double*
const * _isotopeMasses,
const double*
const * _isotopeProbabilities)
117 _isotopeProbabilities[ii],
157 mass +=
marginals[ii]->getLightestConfMass();
165 mass +=
marginals[ii]->getHeaviestConfMass();
177 std::vector<const double*> isotope_masses;
178 std::vector<const double*> isotope_probabilities;
182 setupMarginals(isotope_masses.data(), isotope_probabilities.data());
185 unsigned int parse_formula(
const char* formula, std::vector<const double*>& isotope_masses, std::vector<const double*>& isotope_probabilities,
int**
isotopeNumbers,
int**
atomCounts,
unsigned int*
confSize)
188 size_t slen = strlen(formula);
192 std::vector<std::pair<const char*, size_t> > elements;
193 std::vector<int> numbers;
196 throw invalid_argument(
"Invalid formula: can't be empty");
198 if(!isdigit(formula[slen-1]))
199 throw invalid_argument(
"Invalid formula: every element must be followed by a number - write H2O1 and not H2O for water");
201 for(
size_t ii=0; ii<slen; ii++)
202 if(!isdigit(formula[ii]) && !isalpha(formula[ii]))
203 throw invalid_argument(
"Ivalid formula: contains invalid (non-digit, non-alpha) character");
207 size_t digit_end = 0;
209 while(position < slen)
212 while(isalpha(formula[elem_end]))
214 digit_end = elem_end;
215 while(isdigit(formula[digit_end]))
217 elements.emplace_back(&formula[position], elem_end-position);
218 numbers.push_back(atoi(&formula[elem_end]));
219 position = digit_end;
222 std::vector<int> element_indexes;
224 for (
unsigned int i=0; i<elements.size(); i++)
227 for(
int j=0; j<ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES; j++)
229 if ((strlen(elem_table_symbol[j]) == elements[i].second) && (strncmp(elements[i].first, elem_table_symbol[j], elements[i].second) == 0))
236 throw invalid_argument(
"Invalid formula");
237 element_indexes.push_back(idx);
240 vector<int> _isotope_numbers;
242 for(vector<int>::iterator it = element_indexes.begin(); it != element_indexes.end(); ++it)
246 int atomicNo = elem_table_atomicNo[at_idx];
247 while(at_idx < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES && elem_table_atomicNo[at_idx] == atomicNo)
252 _isotope_numbers.push_back(num);
255 for(vector<int>::iterator it = element_indexes.begin(); it != element_indexes.end(); ++it)
257 isotope_masses.push_back(&elem_table_mass[*it]);
258 isotope_probabilities.push_back(&elem_table_probability[*it]);
261 const unsigned int dimNumber = elements.size();
263 *isotopeNumbers = array_copy<int>(_isotope_numbers.data(),
dimNumber);
264 *atomCounts = array_copy<int>(numbers.data(),
dimNumber);
265 *confSize = dimNumber *
sizeof(int);
280 partialLProbs(alloc_partials ? new double[
dimNumber+1] : nullptr),
281 partialMasses(alloc_partials ? new double[
dimNumber+1] : nullptr),
282 partialProbs(alloc_partials ? new double[
dimNumber+1] : nullptr)
313 Lcutoff(_threshold <= 0.0 ?
std::numeric_limits<double>::lowest() : (_absolute ? log(_threshold) : log(_threshold) +
modeLProb))
330 if(!marginalResultsUnsorted[ii]->inRange(0))
334 if(reorder_marginals)
337 int* tmpMarginalOrder =
new int[
dimNumber];
340 tmpMarginalOrder[ii] = ii;
342 std::sort(tmpMarginalOrder, tmpMarginalOrder + dimNumber, comparator);
346 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
350 marginalOrder[tmpMarginalOrder[ii]] = ii;
352 delete[] tmpMarginalOrder;
357 marginalResults = marginalResultsUnsorted;
358 marginalOrder =
nullptr;
366 for(
int ii=1; ii<dimNumber-1; ii++)
367 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->
getModeLProb();
369 lProbs_ptr = lProbs_ptr_start;
372 partialLProbs_second++;
383 lcfmsv = std::numeric_limits<double>::infinity();
395 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
398 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->
get_no_confs()-1;
421 memset(counter, 0,
sizeof(
int)*
dimNumber);
425 lProbs_ptr = lProbs_ptr_start - 1;
444 logProbs =
new const vector<double>*[
dimNumber];
445 masses =
new const vector<double>*[
dimNumber];
446 marginalConfs =
new const vector<int*>*[
dimNumber];
450 masses[i] = &marginalResults[i]->conf_masses();
451 logProbs[i] = &marginalResults[i]->conf_lprobs();
452 marginalConfs[i] = &marginalResults[i]->confs();
455 topConf = allocator.newConf();
457 reinterpret_cast<char*>(topConf) +
sizeof(
double),
459 sizeof(
int)*dimNumber
462 *(
reinterpret_cast<double*
>(topConf)) =
476 dealloc_table<MarginalTrek*>(marginalResults,
dimNumber);
479 delete[] marginalConfs;
495 int* topConfIsoCounts = getConf(topConf);
497 currentLProb = *(
reinterpret_cast<double*
>(topConf));
498 currentMass = combinedSum( topConfIsoCounts, masses,
dimNumber );
499 currentProb = exp(currentLProb);
504 if(marginalResults[j]->probeConfigurationIdx(topConfIsoCounts[j] + 1))
508 topConfIsoCounts[j]++;
509 *(
reinterpret_cast<double*
>(topConf)) = combinedSum(topConfIsoCounts, logProbs, dimNumber);
511 topConfIsoCounts[j]--;
516 void* acceptedCandidate = allocator.newConf();
517 int* acceptedCandidateIsoCounts = getConf(acceptedCandidate);
518 memcpy(acceptedCandidateIsoCounts, topConfIsoCounts, confSize);
520 acceptedCandidateIsoCounts[j]++;
522 *(
reinterpret_cast<double*
>(acceptedCandidate)) = combinedSum(acceptedCandidateIsoCounts, logProbs, dimNumber);
524 pq.push(acceptedCandidate);
527 if(topConfIsoCounts[j] > 0)
531 topConfIsoCounts[ccount]++;
545 #if !ISOSPEC_BUILDING_R 547 void printConfigurations(
548 const std::tuple<double*,double*,int*,int>& results,
554 for(
int i=0; i<std::get<3>(results); i++){
556 std::cout <<
"Mass = " << std::get<0>(results)[i] <<
557 "\tand log-prob = " << std::get<1>(results)[i] <<
558 "\tand prob = " << exp(std::get<1>(results)[i]) <<
559 "\tand configuration =\t";
563 for(
int k=0; k<isotopeNumbers[j]; k++ )
565 std::cout << std::get<2>(results)[m] <<
" ";
572 std::cout << std::endl;
580 IsoLayeredGenerator::IsoLayeredGenerator(
Iso&& iso,
581 double _targetCoverage,
582 double _percentageToExpand,
587 allocator(dimNumber, _tabSize),
588 candidate(
new int[dimNumber]),
589 targetCoverage(_targetCoverage >= 1.0 ? 10000.0 : _targetCoverage),
592 percentageToExpand(_percentageToExpand),
595 generator_position(-1)
602 logProbs =
new const vector<double>*[
dimNumber];
603 masses =
new const vector<double>*[
dimNumber];
604 marginalConfs =
new const vector<int*>*[
dimNumber];
608 masses[i] = &marginalResults[i]->conf_masses();
609 logProbs[i] = &marginalResults[i]->conf_lprobs();
610 marginalConfs[i] = &marginalResults[i]->confs();
613 void* topConf = allocator.newConf();
614 memset(reinterpret_cast<char*>(topConf) +
sizeof(
double), 0,
sizeof(
int)*dimNumber);
615 *(
reinterpret_cast<double*
>(topConf)) = combinedSum(getConf(topConf), logProbs, dimNumber);
617 current =
new std::vector<void*>();
618 next =
new std::vector<void*>();
620 current->push_back(topConf);
622 lprobThr = (*
reinterpret_cast<double*
>(topConf));
624 if(targetCoverage > 0.0)
625 while(advanceToNextLayer()) {};
629 IsoLayeredGenerator::~IsoLayeredGenerator()
631 if(current !=
nullptr)
637 delete[] marginalConfs;
639 dealloc_table(marginalResults, dimNumber);
642 bool IsoLayeredGenerator::advanceToNextLayer()
645 double maxFringeLprob = -std::numeric_limits<double>::infinity();
647 if(current ==
nullptr)
649 int accepted_in_this_layer = 0;
650 Summator prob_in_this_layer(totalProb);
654 while(current->size() > 0)
656 topConf = current->back();
659 double top_lprob = getLProb(topConf);
661 if(top_lprob >= lprobThr)
663 newaccepted.push_back(topConf);
664 accepted_in_this_layer++;
665 prob_in_this_layer.add(exp(top_lprob));
669 next->push_back(topConf);
673 int* topConfIsoCounts = getConf(topConf);
679 if(marginalResults[j]->probeConfigurationIdx(topConfIsoCounts[j] + 1))
681 memcpy(candidate, topConfIsoCounts, confSize);
684 void* acceptedCandidate = allocator.newConf();
685 int* acceptedCandidateIsoCounts = getConf(acceptedCandidate);
686 memcpy( acceptedCandidateIsoCounts, candidate, confSize);
688 double newConfProb = combinedSum(
696 *(
reinterpret_cast<double*
>(acceptedCandidate)) = newConfProb;
698 if(newConfProb >= lprobThr)
699 current->push_back(acceptedCandidate);
702 next->push_back(acceptedCandidate);
703 if(newConfProb > maxFringeLprob)
704 maxFringeLprob = top_lprob;
707 if(topConfIsoCounts[j] > 0)
712 if(next ==
nullptr || next->size() < 1)
716 if(prob_in_this_layer.get() < targetCoverage)
718 std::vector<void*>* nnew = current;
722 int howmany = floor(current->size()*percentageToExpand);
723 lprobThr = getLProb(
quickselect(current->data(), howmany, 0, current->size()));
724 totalProb = prob_in_this_layer;
733 int end = accepted_in_this_layer - 1;
738 void** lastLayer = &(newaccepted.data()[newaccepted.size()-accepted_in_this_layer]);
741 while(totalProb.get() < targetCoverage)
748 int len = end - start;
749 #if ISOSPEC_BUILDING_R 750 int pivot = len/2 + start;
752 int pivot = rand() % len + start;
754 void* pval = lastLayer[pivot];
755 double pprob = getLProb(pval);
756 mswap(lastLayer[pivot], lastLayer[end-1]);
757 int loweridx = start;
758 for(
int i=start; i<end-1; i++)
760 if(getLProb(lastLayer[i]) > pprob)
762 mswap(lastLayer[i], lastLayer[loweridx]);
766 mswap(lastLayer[end-1], lastLayer[loweridx]);
771 for(
int i=start; i<=loweridx; i++)
773 leftProb.add(exp(getLProb(lastLayer[i])));
775 if(leftProb.get() < targetCoverage)
783 int accend = newaccepted.size()-accepted_in_this_layer+start+1;
786 newaccepted.resize(accend);
791 totalProb = prob_in_this_layer;
802 generator_position++;
803 if(generator_position < newaccepted.size())
805 partialLProbs[0] = getLProb(newaccepted[generator_position]);
806 partialMasses[0] = combinedSum(getConf(newaccepted[generator_position]), masses, dimNumber);
The generator of isotopologues.
The Iso class for the calculation of the isotopic distribution.
The marginal distribution class (a subisotopologue).
double getLightestPeakMass() const
Get the mass of the lightest peak in the isotopic distribution.
IsoThresholdGenerator(Iso &&iso, double _threshold, bool _absolute=true, int _tabSize=1000, int _hashSize=1000, bool reorder_marginals=true)
The move-constructor.
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
virtual ~IsoGenerator()
Destructor.
virtual ~IsoOrderedGenerator()
Destructor.
double getHeaviestPeakMass() const
Get the mass of the heaviest peak in the isotopic distribution.
IsoOrderedGenerator(Iso &&iso, int _tabSize=1000, int _hashSize=1000)
The move-contstructor.
bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
double getModeLProb() const
Get the log-probability of the mode-configuration (if there are many modes, they share this value)...
virtual ~Iso()
Destructor.
const double * get_lProbs_ptr() const
Get the table of the log-probabilities of subisotopologues.
unsigned int get_no_confs() const
Get the number of precomputed subisotopologues.
double getModeLProb() const
Get the log-probability of the mode subisotopologue.
void * quickselect(void **array, int n, int start, int end)
Quickly select the n'th positional statistic, including the weights.
Iso(int _dimNumber, const int *_isotopeNumbers, const int *_atomCounts, const double *const *_isotopeMasses, const double *const *_isotopeProbabilities)
General constructror.
bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
IsoGenerator(Iso &&iso, bool alloc_partials=true)
Move constructor.
The marginal distribution class (a subisotopologue).
void terminate_search()
Block the subsequent search of isotopologues.
Precalculated Marginal class.