| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* BEGIN software license | ||
| 2 | * | ||
| 3 | * MsXpertSuite - mass spectrometry software suite | ||
| 4 | * ----------------------------------------------- | ||
| 5 | * Copyright(C) 2009,...,2018 Filippo Rusconi | ||
| 6 | * | ||
| 7 | * http://www.msxpertsuite.org | ||
| 8 | * | ||
| 9 | * This file is part of the MsXpertSuite project. | ||
| 10 | * | ||
| 11 | * The MsXpertSuite project is the successor of the massXpert project. This | ||
| 12 | * project now includes various independent modules: | ||
| 13 | * | ||
| 14 | * - massXpert, model polymer chemistries and simulate mass spectrometric data; | ||
| 15 | * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner; | ||
| 16 | * | ||
| 17 | * This program is free software: you can redistribute it and/or modify | ||
| 18 | * it under the terms of the GNU General Public License as published by | ||
| 19 | * the Free Software Foundation, either version 3 of the License, or | ||
| 20 | * (at your option) any later version. | ||
| 21 | * | ||
| 22 | * This program is distributed in the hope that it will be useful, | ||
| 23 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 24 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 25 | * GNU General Public License for more details. | ||
| 26 | * | ||
| 27 | * You should have received a copy of the GNU General Public License | ||
| 28 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 29 | * | ||
| 30 | * END software license | ||
| 31 | */ | ||
| 32 | |||
| 33 | #include <QFile> | ||
| 34 | #include <QIODevice> | ||
| 35 | |||
| 36 | |||
| 37 | /////////////////////// Local includes | ||
| 38 | #include "MsXpS/libXpertMassCore/PkaPhPiDataParser.hpp" | ||
| 39 | #include "MsXpS/libXpertMassCore/ChemicalGroup.hpp" | ||
| 40 | |||
| 41 | |||
| 42 | namespace MsXpS | ||
| 43 | { | ||
| 44 | namespace libXpertMassCore | ||
| 45 | { | ||
| 46 | |||
| 47 | |||
| 48 | /*! | ||
| 49 | \class MsXpS::libXpertMassCore::PkaPhPiDataParser | ||
| 50 | \inmodule libXpertMassCore | ||
| 51 | \ingroup PolChemDefBuildingdBlocks | ||
| 52 | \inheaderfile PkaPhPiDataParser.hpp | ||
| 53 | |||
| 54 | \brief The PkaPhPiDataParser class provides a file reader for the pKa, pH, | ||
| 55 | pI data XML file. | ||
| 56 | |||
| 57 | The format is the following: | ||
| 58 | |||
| 59 | \code | ||
| 60 | <pkaphpidata> | ||
| 61 | <monomers> | ||
| 62 | <monomer> | ||
| 63 | <code>A</code> | ||
| 64 | <mnmchemgroup> | ||
| 65 | <name>N-term NH2</name> | ||
| 66 | <pka>9.6</pka> | ||
| 67 | <acidcharged>TRUE</acidcharged> | ||
| 68 | <polrule>left_trapped</polrule> | ||
| 69 | <chemgrouprule> | ||
| 70 | <entity>LE_PLM_MODIF</entity> | ||
| 71 | <name>Acetylation</name> | ||
| 72 | <outcome>LOST</outcome> | ||
| 73 | </chemgrouprule> | ||
| 74 | </mnmchemgroup> | ||
| 75 | <mnmchemgroup> | ||
| 76 | <name>C-term COOH</name> | ||
| 77 | <pka>2.35</pka> | ||
| 78 | <acidcharged>FALSE</acidcharged> | ||
| 79 | <polrule>right_trapped</polrule> | ||
| 80 | </mnmchemgroup> | ||
| 81 | </monomer> | ||
| 82 | [...] | ||
| 83 | <monomer> | ||
| 84 | <code>C</code> | ||
| 85 | <mnmchemgroup> | ||
| 86 | <name>N-term NH2</name> | ||
| 87 | <pka>9.6</pka> | ||
| 88 | <acidcharged>TRUE</acidcharged> | ||
| 89 | <polrule>left_trapped</polrule> | ||
| 90 | <chemgrouprule> | ||
| 91 | <entity>LE_PLM_MODIF</entity> | ||
| 92 | <name>Acetylation</name> | ||
| 93 | <outcome>LOST</outcome> | ||
| 94 | </chemgrouprule> | ||
| 95 | </mnmchemgroup> | ||
| 96 | <mnmchemgroup> | ||
| 97 | <name>C-term COOH</name> | ||
| 98 | <pka>2.35</pka> | ||
| 99 | <acidcharged>FALSE</acidcharged> | ||
| 100 | <polrule>right_trapped</polrule> | ||
| 101 | </mnmchemgroup> | ||
| 102 | <mnmchemgroup> | ||
| 103 | <name>Lateral SH2</name> | ||
| 104 | <pka>8.3</pka> | ||
| 105 | <acidcharged>FALSE</acidcharged> | ||
| 106 | <polrule>never_trapped</polrule> | ||
| 107 | </mnmchemgroup> | ||
| 108 | </monomer> | ||
| 109 | </monomers> | ||
| 110 | <modifs> | ||
| 111 | <modif> | ||
| 112 | <name>Phosphorylation</name> | ||
| 113 | <mdfchemgroup> | ||
| 114 | <name>none_set</name> | ||
| 115 | <pka>1.2</pka> | ||
| 116 | <acidcharged>FALSE</acidcharged> | ||
| 117 | </mdfchemgroup> | ||
| 118 | <mdfchemgroup> | ||
| 119 | <name>none_set</name> | ||
| 120 | <pka>6.5</pka> | ||
| 121 | <acidcharged>FALSE</acidcharged> | ||
| 122 | </mdfchemgroup> | ||
| 123 | </modif> | ||
| 124 | </modifs> | ||
| 125 | </pkaphpidata> | ||
| 126 | \endcode | ||
| 127 | */ | ||
| 128 | |||
| 129 | |||
| 130 | /*! | ||
| 131 | \variable MsXpS::libXpertMassCore::PkaPhPiDataParser::mcsp_polChemDef | ||
| 132 | |||
| 133 | \brief The polymer chemistry definition context in which this PkaPhPiDataParser | ||
| 134 | instance exists. | ||
| 135 | */ | ||
| 136 | |||
| 137 | /*! | ||
| 138 | \variable MsXpS::libXpertMassCore::PkaPhPiDataParser::m_filePath | ||
| 139 | |||
| 140 | \brief The path of the file that contains all the specifications for chemical | ||
| 141 | groups and chemical rules. | ||
| 142 | */ | ||
| 143 | |||
| 144 | /*! | ||
| 145 | \brief Constructs a PkaPhPiDataParser instance. | ||
| 146 | |||
| 147 | \list | ||
| 148 | \li \a pol_chem_def_csp: The polymer chemistry definition (cannot be nullptr). | ||
| 149 | |||
| 150 | \li \a file_path: the specification file path. | ||
| 151 | \endlist | ||
| 152 | */ | ||
| 153 | ✗ | PkaPhPiDataParser::PkaPhPiDataParser(const PolChemDefCstSPtr &pol_chem_def_csp, | |
| 154 | ✗ | const QString &file_path) | |
| 155 | ✗ | : mcsp_polChemDef(pol_chem_def_csp), m_filePath(file_path) | |
| 156 | { | ||
| 157 | ✗ | if(mcsp_polChemDef == nullptr || mcsp_polChemDef.get() == nullptr) | |
| 158 | ✗ | qFatalStream() << "Programming error. Cannot be that pointer is nullptr."; | |
| 159 | ✗ | } | |
| 160 | |||
| 161 | /*! | ||
| 162 | \brief Destructs this PkaPhPiDataParser instance | ||
| 163 | */ | ||
| 164 | ✗ | PkaPhPiDataParser::~PkaPhPiDataParser() | |
| 165 | { | ||
| 166 | ✗ | } | |
| 167 | |||
| 168 | /*! | ||
| 169 | \brief Sets the \a file_path. | ||
| 170 | */ | ||
| 171 | void | ||
| 172 | ✗ | PkaPhPiDataParser::setFilePath(const QString &file_path) | |
| 173 | { | ||
| 174 | ✗ | m_filePath = file_path; | |
| 175 | ✗ | } | |
| 176 | |||
| 177 | /*! | ||
| 178 | \brief Returns the file path. | ||
| 179 | */ | ||
| 180 | const QString & | ||
| 181 | ✗ | PkaPhPiDataParser::filePath() | |
| 182 | { | ||
| 183 | ✗ | return m_filePath; | |
| 184 | } | ||
| 185 | |||
| 186 | /*! | ||
| 187 | \brief Parses the file and fills-in the \a monomers and the \a modifs containers. | ||
| 188 | |||
| 189 | The two container hold specific Monomer and Modif instances as desribed in the loaded file. | ||
| 190 | |||
| 191 | Returns true upon success, false otherwise. | ||
| 192 | */ | ||
| 193 | bool | ||
| 194 | ✗ | PkaPhPiDataParser::renderXmlFile(std::vector<MonomerSPtr> &monomers, | |
| 195 | std::vector<ModifSPtr> &modifs) | ||
| 196 | { | ||
| 197 | // <pkaphpidata> | ||
| 198 | // <monomers> | ||
| 199 | // <monomer> | ||
| 200 | // <code>A</code> | ||
| 201 | // <mnmchemgroup> | ||
| 202 | // <name>N-term NH2</name> | ||
| 203 | // <pka>9.6</pka> | ||
| 204 | // <acidcharged>TRUE</acidcharged> | ||
| 205 | // <polrule>left_trapped</polrule> | ||
| 206 | // <chemgrouprule> | ||
| 207 | // <entity>LE_PLM_MODIF</entity> | ||
| 208 | // <name>Acetylation</name> | ||
| 209 | // <outcome>LOST</outcome> | ||
| 210 | // </chemgrouprule> | ||
| 211 | // </mnmchemgroup> | ||
| 212 | // <mnmchemgroup> | ||
| 213 | // <name>C-term COOH</name> | ||
| 214 | // <pka>2.35</pka> | ||
| 215 | // <acidcharged>FALSE</acidcharged> | ||
| 216 | // <polrule>right_trapped</polrule> | ||
| 217 | // </mnmchemgroup> | ||
| 218 | // </monomer> | ||
| 219 | // [...] | ||
| 220 | // <monomer> | ||
| 221 | // <code>C</code> | ||
| 222 | // <mnmchemgroup> | ||
| 223 | // <name>N-term NH2</name> | ||
| 224 | // <pka>9.6</pka> | ||
| 225 | // <acidcharged>TRUE</acidcharged> | ||
| 226 | // <polrule>left_trapped</polrule> | ||
| 227 | // <chemgrouprule> | ||
| 228 | // <entity>LE_PLM_MODIF</entity> | ||
| 229 | // <name>Acetylation</name> | ||
| 230 | // <outcome>LOST</outcome> | ||
| 231 | // </chemgrouprule> | ||
| 232 | // </mnmchemgroup> | ||
| 233 | // <mnmchemgroup> | ||
| 234 | // <name>C-term COOH</name> | ||
| 235 | // <pka>2.35</pka> | ||
| 236 | // <acidcharged>FALSE</acidcharged> | ||
| 237 | // <polrule>right_trapped</polrule> | ||
| 238 | // </mnmchemgroup> | ||
| 239 | // <mnmchemgroup> | ||
| 240 | // <name>Lateral SH2</name> | ||
| 241 | // <pka>8.3</pka> | ||
| 242 | // <acidcharged>FALSE</acidcharged> | ||
| 243 | // <polrule>never_trapped</polrule> | ||
| 244 | // </mnmchemgroup> | ||
| 245 | // </monomer> | ||
| 246 | // </monomers> | ||
| 247 | // <modifs> | ||
| 248 | // <modif> | ||
| 249 | // <name>Phosphorylation</name> | ||
| 250 | // <mdfchemgroup> | ||
| 251 | // <name>none_set</name> | ||
| 252 | // <pka>1.2</pka> | ||
| 253 | // <acidcharged>FALSE</acidcharged> | ||
| 254 | // </mdfchemgroup> | ||
| 255 | // <mdfchemgroup> | ||
| 256 | // <name>none_set</name> | ||
| 257 | // <pka>6.5</pka> | ||
| 258 | // <acidcharged>FALSE</acidcharged> | ||
| 259 | // </mdfchemgroup> | ||
| 260 | // </modif> | ||
| 261 | // </modifs> | ||
| 262 | // </pkaphpidata> | ||
| 263 | // | ||
| 264 | // The DTD stipulates that: | ||
| 265 | // | ||
| 266 | // <!ELEMENT pkaphpidata(monomers,modifs*)> | ||
| 267 | // <!ELEMENT monomers(monomer*)> | ||
| 268 | // <!ELEMENT modifs(modif*)> | ||
| 269 | // <!ELEMENT monomer(code,mnmchemgroup*)> | ||
| 270 | // <!ELEMENT modif(name,mdfchemgroup*)> | ||
| 271 | |||
| 272 | ✗ | QDomDocument doc("pkaPhPiData"); | |
| 273 | ✗ | QDomElement element; | |
| 274 | ✗ | QDomElement child; | |
| 275 | ✗ | QDomElement indentedChild; | |
| 276 | |||
| 277 | ✗ | QFile file(m_filePath); | |
| 278 | |||
| 279 | ✗ | if(!file.open(QIODevice::ReadOnly)) | |
| 280 | return false; | ||
| 281 | |||
| 282 | ✗ | if(!doc.setContent(&file)) | |
| 283 | { | ||
| 284 | ✗ | file.close(); | |
| 285 | return false; | ||
| 286 | } | ||
| 287 | |||
| 288 | ✗ | file.close(); | |
| 289 | |||
| 290 | ✗ | element = doc.documentElement(); | |
| 291 | |||
| 292 | ✗ | if(element.tagName() != "pkaphpidata") | |
| 293 | { | ||
| 294 | qDebug() << __FILE__ << __LINE__ << "pKa-pH-pI data file is erroneous\n"; | ||
| 295 | return false; | ||
| 296 | } | ||
| 297 | |||
| 298 | // The first child element must be <monomers>. | ||
| 299 | |||
| 300 | ✗ | child = element.firstChildElement(); | |
| 301 | ✗ | if(child.tagName() != "monomers") | |
| 302 | { | ||
| 303 | ✗ | qCritical() << "The PkaPhPi data file could not be parsed: the " | |
| 304 | ✗ | "<monomers> element was not found."; | |
| 305 | ✗ | return false; | |
| 306 | } | ||
| 307 | |||
| 308 | // Parse the <monomer> elements. | ||
| 309 | |||
| 310 | ✗ | indentedChild = child.firstChildElement(); | |
| 311 | ✗ | while(!indentedChild.isNull()) | |
| 312 | { | ||
| 313 | ✗ | if(indentedChild.tagName() != "monomer") | |
| 314 | ✗ | return false; | |
| 315 | |||
| 316 | ✗ | QDomElement superIndentedElement = indentedChild.firstChildElement(); | |
| 317 | |||
| 318 | ✗ | if(superIndentedElement.tagName() != "code") | |
| 319 | { | ||
| 320 | ✗ | qCritical() << "The PkaPhPi data file could not be parsed: the " | |
| 321 | ✗ | "<code> element was not found."; | |
| 322 | ✗ | return false; | |
| 323 | } | ||
| 324 | |||
| 325 | ✗ | MonomerSPtr monomer_sp = | |
| 326 | ✗ | std::make_shared<Monomer>(mcsp_polChemDef, | |
| 327 | /*name*/ "", | ||
| 328 | ✗ | superIndentedElement.text(), | |
| 329 | /*formula string*/ "", | ||
| 330 | ✗ | 0.0, | |
| 331 | ✗ | 0.0); | |
| 332 | |||
| 333 | // All the <mnmchemgroup> elements, if any. | ||
| 334 | |||
| 335 | ✗ | superIndentedElement = superIndentedElement.nextSiblingElement(); | |
| 336 | |||
| 337 | ✗ | while(!superIndentedElement.isNull()) | |
| 338 | { | ||
| 339 | ✗ | if(superIndentedElement.tagName() != "mnmchemgroup") | |
| 340 | { | ||
| 341 | ✗ | qCritical() << "The PkaPhPi data file could not be parsed: the " | |
| 342 | ✗ | "<mnmchemgroup> element was not found."; | |
| 343 | |||
| 344 | ✗ | monomer_sp.reset(); | |
| 345 | ✗ | return false; | |
| 346 | } | ||
| 347 | |||
| 348 | ✗ | ChemicalGroup *chemGroup = new ChemicalGroup("NOT_SET"); | |
| 349 | |||
| 350 | ✗ | if(!chemGroup->renderXmlMnmElement(superIndentedElement)) | |
| 351 | { | ||
| 352 | ✗ | qCritical() << "The PkaPhPi data file could not be parsed: the " | |
| 353 | ✗ | "<mnmchemgroup> element failed to render."; | |
| 354 | |||
| 355 | ✗ | monomer_sp.reset(); | |
| 356 | ✗ | delete chemGroup; | |
| 357 | ✗ | return false; | |
| 358 | } | ||
| 359 | |||
| 360 | ✗ | ChemicalGroupProp *prop = | |
| 361 | ✗ | new ChemicalGroupProp("CHEMICAL_GROUP", chemGroup); | |
| 362 | |||
| 363 | ✗ | monomer_sp->appendProp(prop); | |
| 364 | |||
| 365 | ✗ | superIndentedElement = superIndentedElement.nextSiblingElement(); | |
| 366 | } | ||
| 367 | |||
| 368 | ✗ | monomers.push_back(monomer_sp); | |
| 369 | |||
| 370 | ✗ | indentedChild = indentedChild.nextSiblingElement(); | |
| 371 | ✗ | } | |
| 372 | |||
| 373 | #if 0 | ||
| 374 | |||
| 375 | qDebug() << __FILE__ << __LINE__ | ||
| 376 | << "Debug output of all the monomers parsed:"; | ||
| 377 | |||
| 378 | for (int iter = 0; iter < monomerList->size(); ++iter) | ||
| 379 | { | ||
| 380 | Monomer *monomer = monomerList->at(iter); | ||
| 381 | qDebug() << __FILE__ << __LINE__ | ||
| 382 | << "Monomer:" << monomer->name(); | ||
| 383 | |||
| 384 | for(int jter = 0; jter < monomer->propList()->size(); ++jter) | ||
| 385 | { | ||
| 386 | Prop *prop = monomer->propList()->at(jter); | ||
| 387 | |||
| 388 | if (prop->name() == "CHEMICAL_GROUP") | ||
| 389 | { | ||
| 390 | const ChemicalGroup *chemGroup = | ||
| 391 | static_cast<const ChemicalGroup *>(prop->data()); | ||
| 392 | |||
| 393 | qDebug() << __FILE__ << __LINE__ | ||
| 394 | << "Chemical group:" | ||
| 395 | << chemGroup->name() << chemGroup->pka(); | ||
| 396 | } | ||
| 397 | } | ||
| 398 | } | ||
| 399 | |||
| 400 | #endif | ||
| 401 | |||
| 402 | // And now parse the <modifs> elements, if any, this time, as | ||
| 403 | // this element is not compulsory. | ||
| 404 | |||
| 405 | ✗ | child = child.nextSiblingElement(); | |
| 406 | ✗ | if(child.isNull()) | |
| 407 | return true; | ||
| 408 | |||
| 409 | ✗ | if(child.tagName() != "modifs") | |
| 410 | { | ||
| 411 | ✗ | qCritical() << "The PkaPhPi data file could not be parsed: the " | |
| 412 | ✗ | "<modifs> element was not found."; | |
| 413 | ✗ | return false; | |
| 414 | } | ||
| 415 | |||
| 416 | // Parse the <modif> elements. | ||
| 417 | |||
| 418 | ✗ | indentedChild = child.firstChildElement(); | |
| 419 | ✗ | while(!indentedChild.isNull()) | |
| 420 | { | ||
| 421 | ✗ | if(indentedChild.tagName() != "modif") | |
| 422 | { | ||
| 423 | ✗ | qCritical() << "The PkaPhPi data file could not be parsed: the " | |
| 424 | ✗ | "<modif> element was not found."; | |
| 425 | ✗ | return false; | |
| 426 | } | ||
| 427 | |||
| 428 | ✗ | QDomElement superIndentedElement = indentedChild.firstChildElement(); | |
| 429 | |||
| 430 | ✗ | if(superIndentedElement.tagName() != "name") | |
| 431 | { | ||
| 432 | ✗ | qCritical() << "The PkaPhPi data file could not be parsed: the " | |
| 433 | ✗ | "<name> element was not found."; | |
| 434 | ✗ | return false; | |
| 435 | } | ||
| 436 | |||
| 437 | ✗ | ModifSPtr modif_sp = std::make_shared<Modif>( | |
| 438 | ✗ | mcsp_polChemDef, superIndentedElement.text(), "H0"); | |
| 439 | |||
| 440 | // All the <mdfchemgroup> elements, if any. | ||
| 441 | |||
| 442 | ✗ | superIndentedElement = superIndentedElement.nextSiblingElement(); | |
| 443 | ✗ | while(!superIndentedElement.isNull()) | |
| 444 | { | ||
| 445 | ✗ | if(superIndentedElement.tagName() != "mdfchemgroup") | |
| 446 | { | ||
| 447 | ✗ | qCritical() << "The PkaPhPi data file could not be parsed: the " | |
| 448 | ✗ | "<mdfchemgroup> element was not found."; | |
| 449 | ✗ | modif_sp.reset(); | |
| 450 | ✗ | return false; | |
| 451 | } | ||
| 452 | |||
| 453 | ✗ | ChemicalGroup *chemGroup = new ChemicalGroup("NOT_SET"); | |
| 454 | |||
| 455 | ✗ | if(!chemGroup->renderXmlMdfElement(superIndentedElement)) | |
| 456 | { | ||
| 457 | ✗ | qCritical() << "The PkaPhPi data file could not be parsed: the " | |
| 458 | ✗ | "<mdfchemgroup> element failed to render."; | |
| 459 | ✗ | modif_sp.reset(); | |
| 460 | ✗ | delete chemGroup; | |
| 461 | ✗ | return false; | |
| 462 | } | ||
| 463 | |||
| 464 | ✗ | ChemicalGroupProp *prop = | |
| 465 | ✗ | new ChemicalGroupProp("CHEMICAL_GROUP", chemGroup); | |
| 466 | |||
| 467 | ✗ | modif_sp->appendProp(prop); | |
| 468 | |||
| 469 | ✗ | superIndentedElement = superIndentedElement.nextSiblingElement(); | |
| 470 | } | ||
| 471 | |||
| 472 | ✗ | modifs.push_back(modif_sp); | |
| 473 | |||
| 474 | ✗ | indentedChild = indentedChild.nextSiblingElement(); | |
| 475 | ✗ | } | |
| 476 | |||
| 477 | #if 0 | ||
| 478 | |||
| 479 | qDebug() << __FILE__ << __LINE__ | ||
| 480 | << "Debug output of all the modifs parsed:"; | ||
| 481 | |||
| 482 | for (int iter = 0; iter < modifList->size(); ++iter) | ||
| 483 | { | ||
| 484 | Modif *modif = modifList->at(iter); | ||
| 485 | |||
| 486 | // qDebug() << __FILE__ << __LINE__ | ||
| 487 | // << "Modif:" << modif->name(); | ||
| 488 | |||
| 489 | for(int jter = 0; jter < modif->propList()->size(); ++jter) | ||
| 490 | { | ||
| 491 | Prop *prop = modif->propList()->at(jter); | ||
| 492 | |||
| 493 | if (prop->name() == "CHEMICAL_GROUP") | ||
| 494 | { | ||
| 495 | const ChemicalGroup *chemGroup = | ||
| 496 | static_cast<const ChemicalGroup *>(prop->data()); | ||
| 497 | |||
| 498 | qDebug() << __FILE__ << __LINE__ | ||
| 499 | << "Chemical group:" | ||
| 500 | << chemGroup->name() << chemGroup->pka(); | ||
| 501 | } | ||
| 502 | } | ||
| 503 | } | ||
| 504 | |||
| 505 | #endif | ||
| 506 | |||
| 507 | return true; | ||
| 508 | ✗ | } | |
| 509 | |||
| 510 | } // namespace libXpertMassCore | ||
| 511 | |||
| 512 | } // namespace MsXpS | ||
| 513 |