GCC Code Coverage Report


source/XpertMassCore/src/
File: source/XpertMassCore/src/IsotopicClusterGenerator.cpp
Date: 2025-11-20 01:41:33
Lines:
0/200
0.0%
Functions:
0/24
0.0%
Branches:
0/178
0.0%

Line Branch Exec Source
1 /* BEGIN software license
2 *
3 * MsXpertSuite - mass spectrometry software suite
4 * -----------------------------------------------
5 * Copyright (C) 2009--2020 Filippo Rusconi
6 *
7 * http://www.msxpertsuite.org
8 *
9 * This file is part of the MsXpertSuite project.
10 *
11 * The MsXpertSuite project is the successor of the massXpert project. This
12 * project now includes various independent modules:
13 *
14 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
15 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16 *
17 * This program is free software: you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation, either version 3 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program. If not, see <http://www.gnu.org/licenses/>.
29 *
30 * END software license
31 */
32
33
34 /////////////////////// Std lib includes
35 #include <memory>
36
37 /////////////////////// Qt includes
38 #include <QDebug>
39
40
41 /////////////////////// IsoSpec
42 #include <IsoSpec++/isoSpec++.h>
43 #include <IsoSpec++/element_tables.h>
44
45
46 // extern const int elem_table_atomicNo[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
47 // extern const double
48 // elem_table_probability[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
49 // extern const double elem_table_mass[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
50 // extern const int elem_table_massNo[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
51 // extern const int
52 // elem_table_extraNeutrons[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
53 // extern const char* elem_table_element[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
54 // extern const char* elem_table_symbol[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
55 // extern const bool elem_table_Radioactive[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
56 // extern const double
57 // elem_table_log_probability[ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES];
58
59 /////////////////////// pappsomspp includes
60 #include <pappsomspp/core/trace/trace.h>
61 #include <pappsomspp/core/types.h>
62
63
64 /////////////////////// Local includes
65 #include "MsXpS/libXpertMassCore/globals.hpp"
66 #include "MsXpS/libXpertMassCore/Formula.hpp"
67 #include "MsXpS/libXpertMassCore/IsotopicClusterGenerator.hpp"
68 #include "MsXpS/libXpertMassCore/IsotopicDataLibraryHandler.hpp"
69 #include "MsXpS/libXpertMassCore/IsotopicDataUserConfigHandler.hpp"
70 #include "MsXpS/libXpertMassCore/IsotopicDataManualConfigHandler.hpp"
71
72
73 namespace MsXpS
74 {
75 namespace libXpertMassCore
76 {
77
78
79 /*!
80 \class MsXpS::libXpertMassCore::IsotopicClusterGenerator
81 \inmodule libXpertMassCore
82 \ingroup XpertMassCoreMassCalculations
83 \inheaderfile IsotopicClusterGenerator.hpp
84
85 \brief The IsotopicClusterGenerator class provides the features needed to model
86 isotopic clusters starting from (elemental-composition, charge) pairs.
87
88 The modelling uses the member isotopic data. The generated isotopic clusters
89 only contain cluster centroid peaks. If peaks should have a profile, then they
90 need to be shaped.
91
92 \sa IsotopicClusterShaper, MassPeakShaper
93 */
94
95 /*!
96 \enum MsXpS::libXpertMassCore::IsotopicDataType
97
98 This enum specifies the type of isotopic data.
99
100 \value NOT_SET: Not configured.
101 .
102 \value LIBRARY_CONFIG: The isotopic data are loaded intact from the IsoSpec
103 library data and are considered pristine natural abundance data.
104 .
105 \value USER_CONFIG: The isotopic data are in the same format as for
106 LIBRARY_CONFIG but might have been modified by the user to configure new
107 abundances.
108 .
109 \value MANUAL_CONFIG: The isotopic data are in a specific format, different
110 than the two above, that actually crafts the isotopes starting from scratch.
111 */
112
113 /*!
114 \typealias MsXpS::libXpertMassCore::FormulaChargePair
115
116 Alias for std::pair<QString, int>.
117 */
118
119 /*!
120 \typealias MsXpS::libXpertMassCore::IsotopicClusterChargePair
121
122 Alias for std::pair<pappso::TraceCstSPtr, int>.
123 */
124
125 /*!
126 \typedef MsXpS::libXpertMassCore::IsotopicClusterGeneratorSPtr
127 \relates IsotopicClusterGenerator.
128
129 Synonym for std::shared_ptr<IsotopicClusterGenerator>.
130 */
131
132 /*!
133 \typedef MsXpS::libXpertMassCore::IsotopicClusterGeneratorCstSPtr
134 \relates IsotopicClusterGenerator.
135
136 Synonym for std::shared_ptr<const IsotopicClusterGenerator>.
137 */
138
139 /*!
140 \variable MsXpS::libXpertMassCore::IsotopicClusterGenerator::msp_isotopicData
141
142 \brief The isotopic data needed for the computations.
143 */
144
145 /*!
146 \variable MsXpS::libXpertMassCore::IsotopicClusterGenerator::m_isotopicDataType
147
148 \brief The \l IsotopicDataType type of data.
149 */
150
151 /*!
152 \variable MsXpS::libXpertMassCore::IsotopicClusterGenerator::m_maxSummedProbability
153
154 \brief The summed probability of all the isotopic cluster peaks. The
155 computation stops when this probability is reached.
156 */
157
158 /*!
159 \variable MsXpS::libXpertMassCore::IsotopicClusterGenerator::m_normalizeIntensity
160
161 \brief The most intense cluster peak's intensity that is used to normalize all
162 the other cluster peaks.
163 */
164
165 /*!
166 \variable MsXpS::libXpertMassCore::IsotopicClusterGenerator::m_sortType
167
168 \brief The type of sorting required for the generated cluster peak centroids.
169 */
170
171 /*!
172 \variable MsXpS::libXpertMassCore::IsotopicClusterGenerator::m_sortOrder
173
174 \brief The order of the sorting for the generated cluster peak centroids.
175 */
176
177 /*!
178 \variable MsXpS::libXpertMassCore::IsotopicClusterGenerator::m_formulaChargePairs
179
180 \brief The set of (elemental composition, charge) pairs.
181 */
182
183 /*!
184 \variable MsXpS::libXpertMassCore::IsotopicClusterGenerator::m_isotopicClusterChargePairs
185
186 \brief The set of (isotopic cluster, charge) pairs.
187 */
188
189
190
191 /*!
192 \brief Constructs a IsotopicClusterGenerator instance.
193 */
194 IsotopicClusterGenerator::IsotopicClusterGenerator()
195 {
196 }
197
198 /*!
199 \brief Constructs a IsotopicClusterGenerator instance.
200
201 \list
202 \li \a isotopic_data_sp: The isotopic data used for the calculations.
203 \endlist
204 */
205 IsotopicClusterGenerator::IsotopicClusterGenerator(
206 libXpertMassCore::IsotopicDataSPtr isotopic_data_sp)
207 : msp_isotopicData(isotopic_data_sp)
208 {
209 }
210
211 /*!
212 \brief Destructs this IsotopicClusterGenerator instance.
213 */
214 IsotopicClusterGenerator::~IsotopicClusterGenerator()
215 {
216 // qDebug();
217 }
218
219 /*!
220 \brief Sets the isotopic data type to \a isotopic_data_type.
221 */
222 void
223 IsotopicClusterGenerator::setIsotopicDataType(
224 IsotopicDataType isotopic_data_type)
225 {
226 m_isotopicDataType = isotopic_data_type;
227 }
228
229
230 /*!
231 \brief Sets the isotopic data to \a isotopic_data_sp.
232 */
233 void
234 IsotopicClusterGenerator::setIsotopicData(
235 libXpertMassCore::IsotopicDataSPtr isotopic_data_sp)
236 {
237 msp_isotopicData = isotopic_data_sp;
238 }
239
240
241 /*!
242 \brief Returns the isotopic data.
243 */
244 libXpertMassCore::IsotopicDataSPtr
245 IsotopicClusterGenerator::getIsotopicData() const
246 {
247 return msp_isotopicData;
248 }
249
250
251 /*!
252 \brief Adds the (elemental composition, charge) pair \a
253 formula_charge_pair to the member list of FormulaChargePair instances.
254
255 The member list of FormulaChargePair instances is first cleared.
256 */
257 void
258 IsotopicClusterGenerator::setFormulaChargePair(
259 FormulaChargePair &formula_charge_pair)
260 {
261 m_formulaChargePairs.clear();
262 m_formulaChargePairs.push_back(formula_charge_pair);
263
264 // qDebug() << formula_charge_pair.first << formula_charge_pair.second;
265 }
266
267 /*!
268 \brief Adds the (elemental composition, charge) pair \a
269 formula_charge_pair to the member list of FormulaChargePair instances.
270 */
271 void
272 IsotopicClusterGenerator::appendFormulaChargePair(
273 FormulaChargePair &formula_charge_pair)
274 {
275 m_formulaChargePairs.push_back(formula_charge_pair);
276
277 // qDebug() << formula_charge_pair.first << formula_charge_pair.second;
278 }
279
280 /*!
281 \brief Adds the (elemental composition, charge) pairs \a
282 formula_charge_pairs to the member list of FormulaChargePair instances.
283
284 The member list of FormulaChargePair instances is first cleared.
285 */
286 void
287 IsotopicClusterGenerator::setFormulaChargePairs(
288 const std::vector<FormulaChargePair> &formula_charge_pairs)
289 {
290 m_formulaChargePairs.clear();
291
292 m_formulaChargePairs.assign(formula_charge_pairs.begin(),
293 formula_charge_pairs.end());
294
295 // qDebug() << "Set" << m_formulaChargePairs.size() << "formula/charge pairs";
296 // for(auto pair : m_formulaChargePairs)
297 // qDebug() << pair.first << "/" << pair.second;
298 }
299
300 /*!
301 \brief Sets the summed probability maximum value to \a max_probability.
302 */
303 void
304 IsotopicClusterGenerator::setMaxSummedProbability(double max_probability)
305 {
306 m_maxSummedProbability = max_probability;
307 }
308
309 /*!
310 \brief Sets the normalization intensity to \a normalize_intensity.
311 */
312 void
313 IsotopicClusterGenerator::setNormalizationIntensity(int normalize_intensity)
314 {
315 m_normalizeIntensity = normalize_intensity;
316 }
317
318 /*!
319 \brief Sets the \a sort_type.
320 */
321 void
322 IsotopicClusterGenerator::setSortType(pappso::Enums::SortType sort_type)
323 {
324 m_sortType = sort_type;
325 }
326
327 /*!
328 \brief Sets the \a sort_order.
329 */
330 void
331 IsotopicClusterGenerator::setSortOrder(pappso::Enums::SortOrder sort_order)
332 {
333 m_sortOrder = sort_order;
334 }
335
336 /*!
337 \brief Validates the elemental composition \a formula.
338
339 The \a formula needs to be fully indexed, that is, even an atom present only
340 once needs to be indexed with '1', like this \c H2O1.
341
342 Returns true if validation was successful, false otherwise.
343
344 \sa Formula::validate()
345 */
346 bool
347 IsotopicClusterGenerator::validateFormula(Formula &formula)
348 {
349 // qDebug() << "Checking formula:" << formula.toString()
350 //<< "against an isotopic data set of " << msp_isotopicData->size()
351 //<< "isotopes";
352
353 // Check the syntax of the formula. Note that we need an obligatory count
354 // index even for elements that are present a single time (H2O1 note the
355 // 1).
356
357 // IsoSpec requires that even single-count element be qualified with an
358 // index (H2O1)
359
360 formula.setForceCountIndex(true);
361
362 // We have to validate because the formula might be "C5H6N3-O1", in which case
363 // if would fail the simple checkSyntax() call (that call needs to be used on
364 // already split parts of a formula).
365
366 ErrorList error_list;
367
368 if(!formula.validate(
369 msp_isotopicData, true /*store atom count*/, true /*reset counts*/, &error_list))
370 return false;
371
372 return true;
373 }
374
375 /*!
376 \brief Validates all the elemental compositions in this
377 IsotopicClusterGenerator instance.
378
379 Each \l FormulaChargePair in m_formulaChargePairs is validated for its
380 elemental composition by first creating a \l Formula out of it.
381
382 Returns true if validation was successful, false otherwise.
383
384 \sa validateFormula, Formula::validate()
385 */
386 bool
387 IsotopicClusterGenerator::validateAllFormulas()
388 {
389 for(FormulaChargePair &pair : m_formulaChargePairs)
390 {
391 Formula formula(pair.first);
392
393 if(!validateFormula(formula))
394 return false;
395 }
396
397 return true;
398 }
399
400 /*!
401 \brief Runs the IsoSpec-based isotopic calculations.
402
403 \list
404
405 \li \a element_count: the number of elements in the chemical composition.
406 \li \a charge: the charge of the analyte.
407
408 \li \a per_element_isotopes_count_array_p: pointer to int array. This array
409 lists the number of isotopes that each element has. Typically, C has 2, O has 3,
410 P has 1...
411
412 \li \a per_element_symbol_count_array_p: pointer to int array. This array lists
413 the count of atoms of each element. Typically H20 will have 2 for H and 1 for O.
414
415 \li \a per_element_isotope_masses_arrays_p_p: pointer to pointer to double array
416 (array of arrays of double). Each array contains a new sub-array for each
417 symbol. The sub-array contains the isotopic masses for one of the element
418 symbols.
419
420 \li \a per_element_isotope_probs_arrays_p_p: pointer to pointer to double array
421 (array of arrays of double). Each array contains a new sub-array for each
422 symbol. The sub-array contains the isotopic probs for one of the element
423 symbols.
424
425 \endlist
426
427 Returns a pappso::Trace with the calculated isotopic cluster.
428 */
429 pappso::TraceSPtr
430 IsotopicClusterGenerator::runIsotopicDataCalculations(
431 std::size_t element_count,
432 int charge,
433 int *per_element_isotopes_count_array_p,
434 int *per_element_symbol_count_array_p,
435 double **per_element_isotope_masses_arrays_p_p,
436 double **per_element_isotope_probs_arrays_p_p)
437 {
438 // We get all the isotopic data relevant to the isotopic cluster modelling
439 // calculation as performed by the IsoSpec library.
440
441 if(per_element_isotopes_count_array_p == nullptr ||
442 per_element_symbol_count_array_p == nullptr ||
443 per_element_isotope_masses_arrays_p_p == nullptr ||
444 per_element_isotope_probs_arrays_p_p == nullptr)
445 qFatal("Programming error. The pointers cannot be nullptr.");
446
447
448 if(m_maxSummedProbability <= 0 || m_maxSummedProbability > 1)
449 {
450 qDebug() << "The maximum summed probability has an incorrect value:"
451 << m_maxSummedProbability;
452 return nullptr;
453 }
454
455 IsoSpec::IsoLayeredGenerator iso(
456 IsoSpec::Iso(element_count,
457 per_element_isotopes_count_array_p,
458 per_element_symbol_count_array_p,
459 per_element_isotope_masses_arrays_p_p,
460 per_element_isotope_probs_arrays_p_p),
461 // The three values below are from the documentation (default values in the
462 // constructor). We have added them 20230329 because we discovered that they
463 // were needed on minGW64, otherwise we would experience crashes.
464 1000,
465 1000,
466 true,
467 m_maxSummedProbability);
468
469 // qDebug() << "iso's mono peak mass:" << iso.getMonoisotopicPeakMass();
470
471 // Each time we run a calculation, we do store the results in a new
472 // IsotopicCluster.
473
474 pappso::TraceSPtr isotopic_cluster_sp = std::make_shared<pappso::Trace>();
475
476 // We store the results as std::vector<std::shared_ptr<libmass:PeakCentroid>>
477 // because we'll want to sort the values according to the user's requirements.
478
479 double effective_summed_probs = 0;
480
481 // Iterate in all the cluster configurations and output all the ones that
482 // summatively make a total probability <= to the probability set by the
483 // user.
484
485 /////////////// ATTENTION ////////////////
486 // The loop below is tricky,
487
488 while(iso.advanceToNextConfiguration())
489 {
490 double iso_prob = iso.prob();
491 double iso_mz = iso.mass() / charge;
492
493 // qDebug() << "For current configuration (charge accounted for):" <<
494 // iso_mz
495 //<< iso_prob
496 //<< "and effective_probs_sum : " << effective_summed_probs;
497
498 // Create a peak centroid and store it (remark that we change the mass
499 // of the ion into m/z because the user had set the charge corresponding
500 // to the formula for which the isotopic cluster is being computed.
501
502 isotopic_cluster_sp->push_back(pappso::DataPoint(iso_mz, iso_prob));
503
504 // qDebug() << "Pushed back new peak centroid:" << iso_mz << "/" <<
505 // iso_prob;
506
507 // We do this increment at the end of the block. Indeed, if we had set up
508 // at the top of the block, then, if the very first centroid had already a
509 // prob > m_maxSummedProbability, then we would end up with an empty
510 // cluster!
511
512 effective_summed_probs += iso_prob;
513
514 if(effective_summed_probs > m_maxSummedProbability)
515 {
516 // qDebug() << "Reached the max value: effective_summed_probs:"
517 //<< effective_summed_probs
518 //<< "and m_maxSummedProbability:" << m_maxSummedProbability
519 //<< "BREAKING.";
520 break;
521 }
522 else
523 {
524 // qDebug() << "Not yet reached the max value: effective_summed_probs
525 // : "
526 //<< effective_summed_probs
527 //<< "and m_maxSummedProbability:" << m_maxSummedProbability;
528 }
529 }
530
531 // Now perform the normalization to the Gaussian apex intensity value if
532 // so is requested by the user For this we first need to find
533 // what is the most intensity peak centroid. Then, we'll normalize against
534 // it by dividing all intensity that that most intense value and
535 // multiplying by the requested gaussian apex intensity value.
536
537 // qDebug() << "Now asking for normalization.";
538
539 // Just a debug check.
540 // qDebug() << "Before normalizing, first data point:"
541 //<< isotopic_cluster_sp->front().toString();
542
543 normalizeIntensities(isotopic_cluster_sp);
544
545 // Just a debug check.
546 // qDebug() << "After normalizing, first data point:"
547 //<< isotopic_cluster_sp->front().toString();
548
549 // Now check if the user requests to kind of sorting of the PeakCentroid
550 // instances.
551
552 sortPeakCentroids(isotopic_cluster_sp);
553
554 // qDebug() << "Now returning a cluster of size:" <<
555 // isotopic_cluster_sp->size();
556
557 return isotopic_cluster_sp;
558 }
559
560 /*!
561 \brief Calculates the isotopic cluster's peak centroids for \a
562 formula_charge_pair.
563
564 \list 1
565 \li The elemental composition formula string is converted to a \l Formula and
566 validated.
567 \li The proper isotopic data handler is allocated (\l IsotopicDataType).
568 \li The number of symbols in the elemental composition is determined.
569 \li The int arrays and arrays of double arrays are allocated.
570 \li The arrays are filled-in with \l configureIsotopicData().
571 \li The calculations are performed on these arrays with \l
572 runIsotopicDataCalculations().
573 \endlist
574
575 Returns the results of the computation in the form of a
576 IsotopicClusterChargePair instance.
577 */
578 IsotopicClusterChargePair
579 IsotopicClusterGenerator::generateIsotopicClusterCentroids(
580 FormulaChargePair formula_charge_pair)
581 {
582 // qDebug() << "Starting generation of isotopic cluster for formula:"
583 //<< formula_charge_pair.first;
584
585 Formula formula(formula_charge_pair.first);
586
587 // The check will generate useful data inside the Formula!
588 if(!validateFormula(formula))
589 return std::pair(std::make_shared<const pappso::Trace>(), 0);
590
591 // Use the correct handler!
592
593 std::unique_ptr<IsotopicDataBaseHandler> isotopic_data_handler_up = nullptr;
594
595 if(m_isotopicDataType == IsotopicDataType::LIBRARY_CONFIG)
596 {
597 isotopic_data_handler_up =
598 std::make_unique<IsotopicDataLibraryHandler>(msp_isotopicData);
599 }
600 else if(m_isotopicDataType == IsotopicDataType::USER_CONFIG)
601 {
602 isotopic_data_handler_up =
603 std::make_unique<IsotopicDataUserConfigHandler>(msp_isotopicData);
604 }
605 else if(m_isotopicDataType == IsotopicDataType::MANUAL_CONFIG)
606 {
607 isotopic_data_handler_up =
608 std::make_unique<IsotopicDataManualConfigHandler>(msp_isotopicData);
609 }
610 else
611 qFatal("Programming error. The isotopic data type is not correct.");
612
613
614 // At this point we need to create the arrays exactly as we do in the user
615 // manual config. So we need to know how many different chemical element
616 // symbols we have in the formula.
617
618 std::map<QString, double> symbol_double_count_map =
619 formula.getSymbolCountMapCstRef();
620
621 std::size_t element_count = symbol_double_count_map.size();
622
623 // qDebug() << "Number of different symbols in the formula:" << element_count;
624
625 if(!element_count)
626 {
627 qDebug() << "There is not a single element in the Formula.";
628 return std::pair(std::make_shared<const pappso::Trace>(), 0);
629 }
630
631 // qDebug() << "The validated formula has a symbol/count map size:"
632 //<< element_count;
633
634 // We have to copy the symbol/count map obtained by validating
635 // the formula into the isotopic data handler. That map is essential for the
636 // crafting by the handler of the different IsoSpec arrays.
637
638 // At this point, all the elements defined by the user have been completed and
639 // we'll have to create the static arrays that are needed by IsoSpec.
640
641 // We now need to construct the C arrays for IsoSpec. The arrays need to
642 // be filled-in very accurately.
643
644 // This array lists the number of isotopes that each element has.
645 // Typically, C has 2, O has 3, P has 1...
646 int *per_element_isotopes_count_array_p = nullptr;
647
648 // This array lists the count of atoms of each element.
649 // Typically H20 will have 2 for H and 1 for O.
650 int *per_element_symbol_count_array_p = nullptr;
651
652 // These are arrays of arrays! Each array contains a new sub-array for each
653 // symbol. The sub-array contains the isotopic masses (or probs) for one of
654 // the element symbols. The sub-arrays are allocated by the handler below.
655 double **per_element_isotope_masses_arrays_p_p = nullptr;
656 double **per_element_isotope_probs_arrays_p_p = nullptr;
657
658 // The isotopic cluster calculations do not understand
659 // formulas with double counts for the symbols!!
660 // We thus need to convert the symbol_count_map that is
661 // <QString,double>-based to a map that has its second
662 // member, not of double type but of int type.
663
664 std::map<QString, int> symbol_int_count_map;
665 std::map<QString, double>::const_iterator iter =
666 symbol_double_count_map.begin();
667
668 while(iter != symbol_double_count_map.end())
669 {
670 symbol_int_count_map[iter->first] = static_cast<int>(iter->second);
671
672 // qDebug() << "Old " << iter->first << "double version:" << iter->second
673 // << "new int version:" << symbol_int_count_map[iter->first];
674
675 ++iter;
676 }
677
678 // We pass the array pointers by reference.
679 if(!configureIsotopicData(symbol_int_count_map,
680 per_element_isotopes_count_array_p,
681 per_element_symbol_count_array_p,
682 per_element_isotope_masses_arrays_p_p,
683 per_element_isotope_probs_arrays_p_p))
684 {
685 qDebug() << "Failed to actually prepare the isotopic data tables for the "
686 "computation.";
687
688 return std::pair(std::make_shared<const pappso::Trace>(), 0);
689 }
690
691 // At this point we have all the arrays needed to work.
692
693 pappso::TraceSPtr isotopic_cluster_sp =
694 runIsotopicDataCalculations(element_count,
695 formula_charge_pair.second,
696 per_element_isotopes_count_array_p,
697 per_element_symbol_count_array_p,
698 per_element_isotope_masses_arrays_p_p,
699 per_element_isotope_probs_arrays_p_p);
700
701 // FIXME
702 // To avoid a memory leak, we need to delete the mass and prob heap-allocated
703 // arrays.
704
705 // delete[] per_element_isotopes_count_array_p;
706 // delete[] per_element_symbol_count_array_p;
707
708 // for(std::size_t iter = 0; iter < element_count; ++iter)
709 //{
710 // delete[] per_element_isotope_masses_arrays_p_p[iter];
711 // delete[] per_element_isotope_probs_arrays_p_p[iter];
712 //}
713
714 if(isotopic_cluster_sp == nullptr)
715 qDebug() << "Failed to compute an isotopic cluster for formula:"
716 << formula_charge_pair.first;
717
718 // Just a debug check.
719 // qDebug() << "After normalizing, first data point:"
720 //<< isotopic_cluster_sp->front().toString();
721
722 // qDebug() << "Done generating cluster for formula:"
723 //<< formula_charge_pair.first;
724
725 return std::pair(isotopic_cluster_sp, formula_charge_pair.second);
726 }
727
728
729 /*!
730 \brief Configures the isotopic data in a set of arrays for the (symbol,count)
731 pairs in \a symbol_count_map.
732
733 \list
734
735 \li \a per_element_isotopes_count_array_p: pointer to int array. This array
736 lists
737 the number of isotopes that each element has. Typically, C has 2, O has 3, P has
738 1...
739
740 \li \a per_element_symbol_count_array_p: pointer to int array. This array lists
741 the count of atoms of each element. Typically H20 will have 2 for H and 1 for O.
742
743 \li \a per_element_isotope_masses_arrays_p_p: pointer to pointer to double array
744 (array of arrays of double). Each array contains a new sub-array for each
745 symbol. The sub-array contains the isotopic masses for one of the element
746 symbols.
747
748 \li \a per_element_isotope_probs_arrays_p_p: pointer to pointer to double array
749 (array of arrays of double). Each array contains a new sub-array for each
750 symbol. The sub-array contains the isotopic probs for one of the element
751 symbols.
752
753 \endlist
754
755 Returns a pappso::Trace with the calculated isotopic cluster.
756 */
757 bool
758 IsotopicClusterGenerator::configureIsotopicData(
759 std::map<QString, int> &symbol_count_map,
760 int *&per_element_isotopes_count_array_p,
761 int *&per_element_symbol_count_array_p,
762 double **&per_element_isotope_masses_arrays_p_p,
763 double **&per_element_isotope_probs_arrays_p_p)
764 {
765 // Start by allocating the arrays we'll have to feed in this configuration
766 // work.
767
768 // However, we need to ensure that we actually have some stuff to work on.
769 // That stuff is kind of a formula in the form of the std::map<QString, int>
770 // m_symbolCountMap that pairs the symbols of the atoms in the formula and
771 // the count for each symbol.
772
773 if(!symbol_count_map.size())
774 return false;
775
776 // qDebug() << "The isotopic data have" << msp_isotopicData->size()
777 //<< "isotopes";
778
779 // Example used in the comments below: glucose, C6H12O6.
780
781 // How many isotopes of each element symbols are there?
782 // C:2, H:2, O:3
783 per_element_isotopes_count_array_p = new int[symbol_count_map.size()];
784
785 // How many atoms of each chemical element symbol are there?
786 // C:6, H:12, O:6
787 per_element_symbol_count_array_p = new int[symbol_count_map.size()];
788
789 // Each subarray contains the mass of one of the isotopes for the element
790 // symbol.
791 // First array for C (two masses), second array for H (two masses), third
792 // array for O (three masses).
793 per_element_isotope_masses_arrays_p_p = new double *[symbol_count_map.size()];
794
795 // Each subarray contains the prob of one of the isotopes for the element
796 // symbol.
797 // First array for C (two probs), second array for H (two probs), third
798 // array for O (three probs).
799 per_element_isotope_probs_arrays_p_p = new double *[symbol_count_map.size()];
800
801 // Index that will allow to address the right slot in the arrays being
802 // filled with isotopic data.
803 int current_symbol_index = 0;
804
805 for(auto item : symbol_count_map)
806 {
807 QString symbol = item.first;
808 int symbol_count = item.second;
809
810 // qDebug() << "Iterating in symbol/count:" << symbol << "/" <<
811 // symbol_count;
812
813 // Immediately fill-in the symbol count value.
814 per_element_symbol_count_array_p[current_symbol_index] = symbol_count;
815
816 // Now get iterator bounding the isotopes for this symbol.
817
818 std::pair<QList<IsotopeQSPtr>::const_iterator,
819 QList<IsotopeQSPtr>::const_iterator>
820 iter_pair = msp_isotopicData->getIsotopesBySymbol(symbol);
821
822 // Handy shortcuts
823 QList<IsotopeQSPtr>::const_iterator iter = iter_pair.first;
824 QList<IsotopeQSPtr>::const_iterator iter_end = iter_pair.second;
825
826 qsizetype isotope_count = std::distance(iter, iter_end);
827
828 // qDebug() << "For symbol:" << symbol << "there are:" << isotope_count
829 //<< "isotopes";
830
831 // Sanity check
832 if(isotope_count != msp_isotopicData->getIsotopeCountBySymbol(symbol))
833 qFatal(
834 "Programming error. The is something wrong with the isotopic "
835 "data.");
836
837 // Fill-in the isotope count for the current symbol.
838 per_element_isotopes_count_array_p[current_symbol_index] = isotope_count;
839
840 // Fill-in the isotopes (mass/prob). For each element symbol in the
841 // symbol/count map, we allocate a double array the size of the number
842 // of isotopes so as to store the mass of each isotope (same for the
843 // probs later). Once we have done that fill-in, we can set the address
844 // of the array of the array of array below.
845
846 // Allocate a double array for the masses and another one for the probs.
847
848 double *masses_p = new double[isotope_count];
849 double *probs_p = new double[isotope_count];
850
851 int current_isotope_index = 0;
852
853 while(iter != iter_end)
854 {
855 IsotopeQSPtr isotope_qsp = *iter;
856
857 // qDebug() << "For symbol" << symbol << "iterating with index"
858 //<< current_isotope_index << "with Isotope *"
859 //<< isotope_qsp.get();
860
861 masses_p[current_isotope_index] = isotope_qsp->getMass();
862 probs_p[current_isotope_index] = isotope_qsp->getProbability();
863
864 // Increment the iterator
865 ++iter;
866 // Increment the isotope index so that we fill next array cell.
867 ++current_isotope_index;
868 }
869
870 // At this time the masses and probs for the isotopes of current symbol
871 // have been filled-in.
872
873 per_element_isotope_masses_arrays_p_p[current_symbol_index] = masses_p;
874 per_element_isotope_probs_arrays_p_p[current_symbol_index] = probs_p;
875
876 // We need to increment this index so as to address the right slot in
877 // the arrays!
878 ++current_symbol_index;
879 }
880
881 // At this point we have documented all the isotopic data required to
882 // perform a calculation with IsoSpec.
883
884 return true;
885 }
886
887 /*!
888 \brief Normalizes the intensities of the isotopic cluster's peak centroids in
889 \a isotopic_cluster_sp.
890
891 If normalization is asked for, the most intense peak centroid in \a
892 isotopic_cluster_sp is determined. That intensity becomes the
893 m_normalizeIntensity value and all the other peak centroids' intensities are
894 normalized.
895
896 \note The normalization occurs \e{in place}.
897 */
898 void
899 IsotopicClusterGenerator::normalizeIntensities(
900 pappso::TraceSPtr &isotopic_cluster_sp)
901 {
902
903 // qDebug() << "The normalize_intensity is:" << m_normalizeIntensity;
904
905 // The most intense peak centroid's intensity value needs to be set to the
906 // requested value and the same change ratio needs to be applied to all the
907 // other peak centroids.
908
909 // No normalization is asked for.
910 if(m_normalizeIntensity == std::numeric_limits<int>::min())
911 {
912 // qDebug() << "No normalization was asked for. Skipping.";
913 return;
914 }
915
916 if(!isotopic_cluster_sp->size())
917 {
918 qDebug() << "The isotopic cluster has not a single data point.";
919 return;
920 }
921 // First get the most intense centroid.
922
923 double max_found_intensity = 0;
924
925 // qDebug() << "isotopic_cluster_sp->size():" << isotopic_cluster_sp->size();
926
927 pappso::Trace::iterator vector_iterator = std::max_element(
928 isotopic_cluster_sp->begin(),
929 isotopic_cluster_sp->end(),
930 [](const pappso::DataPoint first, const pappso::DataPoint second) {
931 return first.y < second.y;
932 });
933
934 if(vector_iterator == isotopic_cluster_sp->end())
935 qFatal("Programming error");
936
937 max_found_intensity = (*vector_iterator).y;
938
939 if(!max_found_intensity)
940 qFatal("The maximum intensity of the whole isotopic cluster is 0.");
941
942 // qDebug().noquote() << "Peak centroid with maximum intensity: "
943 //<< vector_iterator->toString();
944
945 // Calculate the ratio between the final requested intensity and the greatest
946 // intensity. That ratio will be multiplied to intensity of the each peak
947 // centroid, which is why we call it a factor.
948
949 double intensity_factor = m_normalizeIntensity / max_found_intensity;
950
951 // qDebug() << "Will multiply this intensity factor to each data point's "
952 //"intensity value:"
953 //<< intensity_factor;
954
955 // Just a debug check.
956 // qDebug() << "Before normalizing, first data point:"
957 //<< isotopic_cluster_sp->front().toString();
958
959 std::for_each(
960 isotopic_cluster_sp->begin(),
961 isotopic_cluster_sp->end(),
962 [intensity_factor](pappso::DataPoint &data_point) // modify in-place
963 {
964 // qDebug() << "Before normalization:" <<
965 // data_point.toString();
966
967 double normalized_intensity = data_point.y * intensity_factor;
968
969 data_point.y = normalized_intensity;
970
971 // qDebug() << "After normalization:" <<
972 // data_point.toString();
973 });
974
975 // At this point the centroids' intensity have been normalized.
976
977 // Just a debug check.
978 // qDebug() << "After normalizing, first data point:"
979 //<< isotopic_cluster_sp->front().toString();
980 }
981
982 /*!
983 \brief Sorts the peak centroids of the isotopic cluster \a isotopic_cluster_sp.
984
985 The sort is performed according to \l m_sortType.
986 */
987 void
988 IsotopicClusterGenerator::sortPeakCentroids(
989 pappso::TraceSPtr &isotopic_cluster_sp)
990 {
991 if(m_sortType == pappso::Enums::SortType::none)
992 return;
993
994 return isotopic_cluster_sp->sort(m_sortType, m_sortOrder);
995 }
996
997 /*!
998 \brief Returns a string containing a space-separated set of m/z, intensity
999 pairs, representing the isotopic cluster in \a isotopic_cluster_sp.
1000 */
1001 QString
1002 IsotopicClusterGenerator::clusterToString(
1003 const pappso::TraceCstSPtr &isotopic_cluster_sp) const
1004 {
1005
1006 // Export the results as a string. Note how we do export the relative
1007 // intensity. If there was normalization, that value was updated, otherwise it
1008 // had been initialized identical to the intensity upon creation of the
1009 // PeakCentroid instances in the vector.
1010
1011 QString text;
1012
1013 for(const pappso::DataPoint &dp : *isotopic_cluster_sp)
1014 {
1015 text += QString("%1 %2\n").arg(dp.x, 0, 'f', 30).arg(dp.y, 0, 'f', 30);
1016 }
1017
1018 // qDebug().noquote() << "Cluster to string: " << text;
1019
1020 return text;
1021 }
1022
1023
1024 /*!
1025 \brief Returns a string containing a space-separated set of m/z, intensity
1026 pairs, representing the isotopic clusters in the member isotopic clusters \l
1027 m_isotopicClusterChargePairs.
1028 */
1029 QString
1030 IsotopicClusterGenerator::clustersToString() const
1031 {
1032
1033 // Export the results as a string. Note how we do export the relative
1034 // intensity. If there was normalization, that value was updated, otherwise it
1035 // had been initialized identical to the intensity upon creation of the
1036 // PeakCentroid instances in the vector.
1037
1038 QString text;
1039
1040 for(auto isotopic_cluster_charge_pair : m_isotopicClusterChargePairs)
1041 {
1042 text += clusterToString(isotopic_cluster_charge_pair.first);
1043 }
1044
1045 // qDebug().noquote() << text;
1046
1047 return text;
1048 }
1049
1050 /*!
1051 \brief Starts the computations.
1052
1053 The member m_isotopicClusterChargePairs are first cleared.
1054
1055 Returns the count of IsotopicClusterChargePair instances generated upon the
1056 calculations.
1057 */
1058 std::size_t
1059 IsotopicClusterGenerator::run()
1060 {
1061 // Iterate in all the formula/charge pairs and for each compute an isotopic
1062 // cluster.
1063
1064 m_isotopicClusterChargePairs.clear();
1065
1066 for(auto formula_charge_pair : m_formulaChargePairs)
1067 {
1068 IsotopicClusterChargePair pair =
1069 generateIsotopicClusterCentroids(formula_charge_pair);
1070
1071 if(!pair.first->size())
1072 {
1073 qFatal("The isotopic cluster is empty for formula: %s",
1074 formula_charge_pair.first.toLatin1().data());
1075 }
1076
1077 m_isotopicClusterChargePairs.push_back(pair);
1078 }
1079
1080 return m_isotopicClusterChargePairs.size();
1081 }
1082
1083 /*!
1084 \brief Returns the member list ofIsotopicClusterChargePair instances.
1085 */
1086 const std::vector<IsotopicClusterChargePair> &
1087 IsotopicClusterGenerator::getIsotopicClusterChargePairs() const
1088 {
1089 return m_isotopicClusterChargePairs;
1090 }
1091
1092
1093 } // namespace libXpertMassCore
1094 } // namespace MsXpS
1095