GCC Code Coverage Report


source/XpertMassCore/src/
File: source/XpertMassCore/src/PkaPhPiDataParser.cpp
Date: 2025-11-20 01:41:33
Lines:
0/106
0.0%
Functions:
0/5
0.0%
Branches:
0/238
0.0%

Line Branch Exec Source
1 /* BEGIN software license
2 *
3 * MsXpertSuite - mass spectrometry software suite
4 * -----------------------------------------------
5 * Copyright(C) 2009,...,2018 Filippo Rusconi
6 *
7 * http://www.msxpertsuite.org
8 *
9 * This file is part of the MsXpertSuite project.
10 *
11 * The MsXpertSuite project is the successor of the massXpert project. This
12 * project now includes various independent modules:
13 *
14 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
15 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16 *
17 * This program is free software: you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation, either version 3 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program. If not, see <http://www.gnu.org/licenses/>.
29 *
30 * END software license
31 */
32
33 #include <QFile>
34 #include <QIODevice>
35
36
37 /////////////////////// Local includes
38 #include "MsXpS/libXpertMassCore/PkaPhPiDataParser.hpp"
39 #include "MsXpS/libXpertMassCore/ChemicalGroup.hpp"
40
41
42 namespace MsXpS
43 {
44 namespace libXpertMassCore
45 {
46
47
48 /*!
49 \class MsXpS::libXpertMassCore::PkaPhPiDataParser
50 \inmodule libXpertMassCore
51 \ingroup PolChemDefBuildingdBlocks
52 \inheaderfile PkaPhPiDataParser.hpp
53
54 \brief The PkaPhPiDataParser class provides a file reader for the pKa, pH,
55 pI data XML file.
56
57 The format is the following:
58
59 \code
60 <pkaphpidata>
61 <monomers>
62 <monomer>
63 <code>A</code>
64 <mnmchemgroup>
65 <name>N-term NH2</name>
66 <pka>9.6</pka>
67 <acidcharged>TRUE</acidcharged>
68 <polrule>left_trapped</polrule>
69 <chemgrouprule>
70 <entity>LE_PLM_MODIF</entity>
71 <name>Acetylation</name>
72 <outcome>LOST</outcome>
73 </chemgrouprule>
74 </mnmchemgroup>
75 <mnmchemgroup>
76 <name>C-term COOH</name>
77 <pka>2.35</pka>
78 <acidcharged>FALSE</acidcharged>
79 <polrule>right_trapped</polrule>
80 </mnmchemgroup>
81 </monomer>
82 [...]
83 <monomer>
84 <code>C</code>
85 <mnmchemgroup>
86 <name>N-term NH2</name>
87 <pka>9.6</pka>
88 <acidcharged>TRUE</acidcharged>
89 <polrule>left_trapped</polrule>
90 <chemgrouprule>
91 <entity>LE_PLM_MODIF</entity>
92 <name>Acetylation</name>
93 <outcome>LOST</outcome>
94 </chemgrouprule>
95 </mnmchemgroup>
96 <mnmchemgroup>
97 <name>C-term COOH</name>
98 <pka>2.35</pka>
99 <acidcharged>FALSE</acidcharged>
100 <polrule>right_trapped</polrule>
101 </mnmchemgroup>
102 <mnmchemgroup>
103 <name>Lateral SH2</name>
104 <pka>8.3</pka>
105 <acidcharged>FALSE</acidcharged>
106 <polrule>never_trapped</polrule>
107 </mnmchemgroup>
108 </monomer>
109 </monomers>
110 <modifs>
111 <modif>
112 <name>Phosphorylation</name>
113 <mdfchemgroup>
114 <name>none_set</name>
115 <pka>1.2</pka>
116 <acidcharged>FALSE</acidcharged>
117 </mdfchemgroup>
118 <mdfchemgroup>
119 <name>none_set</name>
120 <pka>6.5</pka>
121 <acidcharged>FALSE</acidcharged>
122 </mdfchemgroup>
123 </modif>
124 </modifs>
125 </pkaphpidata>
126 \endcode
127 */
128
129
130 /*!
131 \variable MsXpS::libXpertMassCore::PkaPhPiDataParser::mcsp_polChemDef
132
133 \brief The polymer chemistry definition context in which this PkaPhPiDataParser
134 instance exists.
135 */
136
137 /*!
138 \variable MsXpS::libXpertMassCore::PkaPhPiDataParser::m_filePath
139
140 \brief The path of the file that contains all the specifications for chemical
141 groups and chemical rules.
142 */
143
144 /*!
145 \brief Constructs a PkaPhPiDataParser instance.
146
147 \list
148 \li \a pol_chem_def_csp: The polymer chemistry definition (cannot be nullptr).
149
150 \li \a file_path: the specification file path.
151 \endlist
152 */
153 PkaPhPiDataParser::PkaPhPiDataParser(const PolChemDefCstSPtr &pol_chem_def_csp,
154 const QString &file_path)
155 : mcsp_polChemDef(pol_chem_def_csp), m_filePath(file_path)
156 {
157 if(mcsp_polChemDef == nullptr || mcsp_polChemDef.get() == nullptr)
158 qFatalStream() << "Programming error. Cannot be that pointer is nullptr.";
159 }
160
161 /*!
162 \brief Destructs this PkaPhPiDataParser instance
163 */
164 PkaPhPiDataParser::~PkaPhPiDataParser()
165 {
166 }
167
168 /*!
169 \brief Sets the \a file_path.
170 */
171 void
172 PkaPhPiDataParser::setFilePath(const QString &file_path)
173 {
174 m_filePath = file_path;
175 }
176
177 /*!
178 \brief Returns the file path.
179 */
180 const QString &
181 PkaPhPiDataParser::filePath()
182 {
183 return m_filePath;
184 }
185
186 /*!
187 \brief Parses the file and fills-in the \a monomers and the \a modifs containers.
188
189 The two container hold specific Monomer and Modif instances as desribed in the loaded file.
190
191 Returns true upon success, false otherwise.
192 */
193 bool
194 PkaPhPiDataParser::renderXmlFile(std::vector<MonomerSPtr> &monomers,
195 std::vector<ModifSPtr> &modifs)
196 {
197 // <pkaphpidata>
198 // <monomers>
199 // <monomer>
200 // <code>A</code>
201 // <mnmchemgroup>
202 // <name>N-term NH2</name>
203 // <pka>9.6</pka>
204 // <acidcharged>TRUE</acidcharged>
205 // <polrule>left_trapped</polrule>
206 // <chemgrouprule>
207 // <entity>LE_PLM_MODIF</entity>
208 // <name>Acetylation</name>
209 // <outcome>LOST</outcome>
210 // </chemgrouprule>
211 // </mnmchemgroup>
212 // <mnmchemgroup>
213 // <name>C-term COOH</name>
214 // <pka>2.35</pka>
215 // <acidcharged>FALSE</acidcharged>
216 // <polrule>right_trapped</polrule>
217 // </mnmchemgroup>
218 // </monomer>
219 // [...]
220 // <monomer>
221 // <code>C</code>
222 // <mnmchemgroup>
223 // <name>N-term NH2</name>
224 // <pka>9.6</pka>
225 // <acidcharged>TRUE</acidcharged>
226 // <polrule>left_trapped</polrule>
227 // <chemgrouprule>
228 // <entity>LE_PLM_MODIF</entity>
229 // <name>Acetylation</name>
230 // <outcome>LOST</outcome>
231 // </chemgrouprule>
232 // </mnmchemgroup>
233 // <mnmchemgroup>
234 // <name>C-term COOH</name>
235 // <pka>2.35</pka>
236 // <acidcharged>FALSE</acidcharged>
237 // <polrule>right_trapped</polrule>
238 // </mnmchemgroup>
239 // <mnmchemgroup>
240 // <name>Lateral SH2</name>
241 // <pka>8.3</pka>
242 // <acidcharged>FALSE</acidcharged>
243 // <polrule>never_trapped</polrule>
244 // </mnmchemgroup>
245 // </monomer>
246 // </monomers>
247 // <modifs>
248 // <modif>
249 // <name>Phosphorylation</name>
250 // <mdfchemgroup>
251 // <name>none_set</name>
252 // <pka>1.2</pka>
253 // <acidcharged>FALSE</acidcharged>
254 // </mdfchemgroup>
255 // <mdfchemgroup>
256 // <name>none_set</name>
257 // <pka>6.5</pka>
258 // <acidcharged>FALSE</acidcharged>
259 // </mdfchemgroup>
260 // </modif>
261 // </modifs>
262 // </pkaphpidata>
263 //
264 // The DTD stipulates that:
265 //
266 // <!ELEMENT pkaphpidata(monomers,modifs*)>
267 // <!ELEMENT monomers(monomer*)>
268 // <!ELEMENT modifs(modif*)>
269 // <!ELEMENT monomer(code,mnmchemgroup*)>
270 // <!ELEMENT modif(name,mdfchemgroup*)>
271
272 QDomDocument doc("pkaPhPiData");
273 QDomElement element;
274 QDomElement child;
275 QDomElement indentedChild;
276
277 QFile file(m_filePath);
278
279 if(!file.open(QIODevice::ReadOnly))
280 return false;
281
282 if(!doc.setContent(&file))
283 {
284 file.close();
285 return false;
286 }
287
288 file.close();
289
290 element = doc.documentElement();
291
292 if(element.tagName() != "pkaphpidata")
293 {
294 qDebug() << __FILE__ << __LINE__ << "pKa-pH-pI data file is erroneous\n";
295 return false;
296 }
297
298 // The first child element must be <monomers>.
299
300 child = element.firstChildElement();
301 if(child.tagName() != "monomers")
302 {
303 qCritical() << "The PkaPhPi data file could not be parsed: the "
304 "<monomers> element was not found.";
305 return false;
306 }
307
308 // Parse the <monomer> elements.
309
310 indentedChild = child.firstChildElement();
311 while(!indentedChild.isNull())
312 {
313 if(indentedChild.tagName() != "monomer")
314 return false;
315
316 QDomElement superIndentedElement = indentedChild.firstChildElement();
317
318 if(superIndentedElement.tagName() != "code")
319 {
320 qCritical() << "The PkaPhPi data file could not be parsed: the "
321 "<code> element was not found.";
322 return false;
323 }
324
325 MonomerSPtr monomer_sp =
326 std::make_shared<Monomer>(mcsp_polChemDef,
327 /*name*/ "",
328 superIndentedElement.text(),
329 /*formula string*/ "",
330 0.0,
331 0.0);
332
333 // All the <mnmchemgroup> elements, if any.
334
335 superIndentedElement = superIndentedElement.nextSiblingElement();
336
337 while(!superIndentedElement.isNull())
338 {
339 if(superIndentedElement.tagName() != "mnmchemgroup")
340 {
341 qCritical() << "The PkaPhPi data file could not be parsed: the "
342 "<mnmchemgroup> element was not found.";
343
344 monomer_sp.reset();
345 return false;
346 }
347
348 ChemicalGroup *chemGroup = new ChemicalGroup("NOT_SET");
349
350 if(!chemGroup->renderXmlMnmElement(superIndentedElement))
351 {
352 qCritical() << "The PkaPhPi data file could not be parsed: the "
353 "<mnmchemgroup> element failed to render.";
354
355 monomer_sp.reset();
356 delete chemGroup;
357 return false;
358 }
359
360 ChemicalGroupProp *prop =
361 new ChemicalGroupProp("CHEMICAL_GROUP", chemGroup);
362
363 monomer_sp->appendProp(prop);
364
365 superIndentedElement = superIndentedElement.nextSiblingElement();
366 }
367
368 monomers.push_back(monomer_sp);
369
370 indentedChild = indentedChild.nextSiblingElement();
371 }
372
373 #if 0
374
375 qDebug() << __FILE__ << __LINE__
376 << "Debug output of all the monomers parsed:";
377
378 for (int iter = 0; iter < monomerList->size(); ++iter)
379 {
380 Monomer *monomer = monomerList->at(iter);
381 qDebug() << __FILE__ << __LINE__
382 << "Monomer:" << monomer->name();
383
384 for(int jter = 0; jter < monomer->propList()->size(); ++jter)
385 {
386 Prop *prop = monomer->propList()->at(jter);
387
388 if (prop->name() == "CHEMICAL_GROUP")
389 {
390 const ChemicalGroup *chemGroup =
391 static_cast<const ChemicalGroup *>(prop->data());
392
393 qDebug() << __FILE__ << __LINE__
394 << "Chemical group:"
395 << chemGroup->name() << chemGroup->pka();
396 }
397 }
398 }
399
400 #endif
401
402 // And now parse the <modifs> elements, if any, this time, as
403 // this element is not compulsory.
404
405 child = child.nextSiblingElement();
406 if(child.isNull())
407 return true;
408
409 if(child.tagName() != "modifs")
410 {
411 qCritical() << "The PkaPhPi data file could not be parsed: the "
412 "<modifs> element was not found.";
413 return false;
414 }
415
416 // Parse the <modif> elements.
417
418 indentedChild = child.firstChildElement();
419 while(!indentedChild.isNull())
420 {
421 if(indentedChild.tagName() != "modif")
422 {
423 qCritical() << "The PkaPhPi data file could not be parsed: the "
424 "<modif> element was not found.";
425 return false;
426 }
427
428 QDomElement superIndentedElement = indentedChild.firstChildElement();
429
430 if(superIndentedElement.tagName() != "name")
431 {
432 qCritical() << "The PkaPhPi data file could not be parsed: the "
433 "<name> element was not found.";
434 return false;
435 }
436
437 ModifSPtr modif_sp = std::make_shared<Modif>(
438 mcsp_polChemDef, superIndentedElement.text(), "H0");
439
440 // All the <mdfchemgroup> elements, if any.
441
442 superIndentedElement = superIndentedElement.nextSiblingElement();
443 while(!superIndentedElement.isNull())
444 {
445 if(superIndentedElement.tagName() != "mdfchemgroup")
446 {
447 qCritical() << "The PkaPhPi data file could not be parsed: the "
448 "<mdfchemgroup> element was not found.";
449 modif_sp.reset();
450 return false;
451 }
452
453 ChemicalGroup *chemGroup = new ChemicalGroup("NOT_SET");
454
455 if(!chemGroup->renderXmlMdfElement(superIndentedElement))
456 {
457 qCritical() << "The PkaPhPi data file could not be parsed: the "
458 "<mdfchemgroup> element failed to render.";
459 modif_sp.reset();
460 delete chemGroup;
461 return false;
462 }
463
464 ChemicalGroupProp *prop =
465 new ChemicalGroupProp("CHEMICAL_GROUP", chemGroup);
466
467 modif_sp->appendProp(prop);
468
469 superIndentedElement = superIndentedElement.nextSiblingElement();
470 }
471
472 modifs.push_back(modif_sp);
473
474 indentedChild = indentedChild.nextSiblingElement();
475 }
476
477 #if 0
478
479 qDebug() << __FILE__ << __LINE__
480 << "Debug output of all the modifs parsed:";
481
482 for (int iter = 0; iter < modifList->size(); ++iter)
483 {
484 Modif *modif = modifList->at(iter);
485
486 // qDebug() << __FILE__ << __LINE__
487 // << "Modif:" << modif->name();
488
489 for(int jter = 0; jter < modif->propList()->size(); ++jter)
490 {
491 Prop *prop = modif->propList()->at(jter);
492
493 if (prop->name() == "CHEMICAL_GROUP")
494 {
495 const ChemicalGroup *chemGroup =
496 static_cast<const ChemicalGroup *>(prop->data());
497
498 qDebug() << __FILE__ << __LINE__
499 << "Chemical group:"
500 << chemGroup->name() << chemGroup->pka();
501 }
502 }
503 }
504
505 #endif
506
507 return true;
508 }
509
510 } // namespace libXpertMassCore
511
512 } // namespace MsXpS
513