GCC Code Coverage Report

Line	Exec	Source
1		/* BEGIN software license
2		*
3		* MsXpertSuite - mass spectrometry software suite
4		* -----------------------------------------------
5		* Copyright(C) 2009,...,2018 Filippo Rusconi
6		*
7		* http://www.msxpertsuite.org
8		*
9		* This file is part of the MsXpertSuite project.
10		*
11		* The MsXpertSuite project is the successor of the massXpert project. This
12		* project now includes various independent modules:
13		*
14		* - massXpert, model polymer chemistries and simulate mass spectrometric data;
15		* - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16		*
17		* This program is free software: you can redistribute it and/or modify
18		* it under the terms of the GNU General Public License as published by
19		* the Free Software Foundation, either version 3 of the License, or
20		* (at your option) any later version.
21		*
22		* This program is distributed in the hope that it will be useful,
23		* but WITHOUT ANY WARRANTY; without even the implied warranty of
24		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25		* GNU General Public License for more details.
26		*
27		* You should have received a copy of the GNU General Public License
28		* along with this program. If not, see <http://www.gnu.org/licenses/>.
29		*
30		* END software license
31		*/
32
33
34		/////////////////////// Qt includes
35		#include <QObject>
36		#include <QFile>
37
38
39		/////////////////////// Local includes
40		#include "MsXpS/libXpertMassCore/MonomerDictionary.hpp"
41		#include "MsXpS/libXpertMassCore/Sequence.hpp"
42
43		namespace MsXpS
44		{
45		namespace libXpertMassCore
46		{
47
48		/*!
49		\class MsXpS::libXpertMassCore::MonomerDictionary
50		\inmodule libXpertMassCore
51		\ingroup XpertMassCoreUtilities
52		\inheaderfile MonomerDictionary.hpp
53
54		\brief The MonomerDictionary class provides a Monomer code dictionary allowing
55		the user to automatically translate Monomer codes from x-letter codes to
56		y-letter codes. For example, a monomer dictionary file can define how to
57		translate 3-letter monomer codes to 1-letter codes. This is typically useful
58		when working on Protein Database (PDB) file.
59
60		The format of the dictionary file is the following:
61
62		\code
63		# Converts from code on the left of '>' to code on the right.
64		# Number of letters allowed in each code is
65		# described with syntax 3>1 and that line should be the first
66		# non-comment line in this file.
67		3>1
68		ALA>A
69		CYS>C
70		ASP>D
71		\endcode
72
73		There might be more than one \e{section} in the file, with, for example, 3>1
74		translations and then 1>3 translations.
75		*/
76
77		/*!
78		\variable MsXpS::libXpertMassCore::MonomerDictionary::m_filePath
79
80		\brief Path to the file documenting the translations.
81		*/
82
83		/*!
84		\variable MsXpS::libXpertMassCore::MonomerDictionary::m_dictionaryHash
85
86		\brief The hash that documents the translations.
87		*/
88
89		/*!
90		\variable MsXpS::libXpertMassCore::MonomerDictionary::m_dictionaryLoaded
91
92		\brief Indicates if the dictionary file has been loaded already.
93		*/
94
95		/*!
96		\variable MsXpS::libXpertMassCore::MonomerDictionary::m_inputChainStringList
97
98		\brief The list of sequences to be converted.
99		*/
100
101		/*!
102		\variable MsXpS::libXpertMassCore::MonomerDictionary::m_inputCodeLength
103
104		\brief The count of letters in the input Monomer code.
105
106		In a dictionary file that has a section
107
108		\code
109		3>1
110		\endcode
111
112		this value would be \e 3.
113		*/
114
115		/*!
116		\variable MsXpS::libXpertMassCore::MonomerDictionary::m_outputCodeLength
117
118		\brief The count of letters in the output Monomer code.
119
120		In a dictionary file that has a section
121
122		\code
123		3>1
124		\endcode
125
126		this value would be \e 1.
127		*/
128
129		/*!
130		\brief Constructs a MonomerDictionary instance.
131
132		\list
133		\li \a file_path: the path to the file containing the Monomer dictionary.
134		\li \a input_chain_string_list: the list of sequences to be converted.
135		\li \a input_code_length: the count of letters in the Monomer codes in the input string.
136		\li \a output_code_length: the count of letters in the Monomer codes in the output string.
137		\endlist
138		*/
139	✗	MonomerDictionary::MonomerDictionary(QString file_path,
140		const QStringList &input_chain_string_list,
141		int input_code_length,
142	✗	int output_code_length)
143	✗	: m_filePath(file_path),
144	✗	m_inputChainStringList(input_chain_string_list),
145	✗	m_inputCodeLength(input_code_length),
146	✗	m_outputCodeLength(output_code_length)
147		{
148	✗	}
149
150		/*!
151		\brief Destructs this MonomerDictionary instance.
152		*/
153	✗	MonomerDictionary::~MonomerDictionary()
154		{
155	✗	}
156
157		/*!
158		\brief Sets the \a file_path to the Monomer dictionary file.
159		*/
160		void
161	✗	MonomerDictionary::setFilePath(QString &file_path)
162		{
163	✗	m_filePath = file_path;
164	✗	}
165
166		/*!
167		\brief Sets the list of input sequences to \a input_chain_string_list.
168		*/
169		void
170	✗	MonomerDictionary::setInputChainStringList(
171		const QStringList &input_chain_string_list)
172		{
173	✗	m_inputChainStringList = input_chain_string_list;
174	✗	}
175
176		/*!
177		\brief Set the count of letters in the input Monomer codes to \a code_length.
178		*/
179		void
180	✗	MonomerDictionary::setInputCodeLength(int code_length)
181		{
182	✗	m_inputCodeLength = code_length;
183	✗	}
184
185		/*!
186		\brief Set the count of letters in the output Monomer codes to \a code_length.
187		*/
188		void
189	✗	MonomerDictionary::setOutputCodeLength(int code_length)
190		{
191	✗	m_outputCodeLength = code_length;
192	✗	}
193
194		/*!
195		\brief Return true if the \a line parsed is in the form X>Y, that is, that it specifies the kind of Monomer code translation.
196		*/
197		bool
198	✗	MonomerDictionary::isLineProperSectionDivider(const QString &line)
199		{
200		// Section dividers in the monomer dictionary file format are
201		// lines containing the following syntax: X>Y, that is for example
202		// 3>1. This means that the following translation rules (like
203		// ILE>I) should convert 3-letter codes into 1-letter codes.
204
205		// However, this line should only be considered proper if X is
206		// actually the value of m_inputCodeLength and Y the value of
207		// m_outputCodeLength.
208
209		// qDebug() << __FILE__ << __LINE__
210		// << "Checking if line is proper section divider :" << line;
211
212	✗	if(line.contains(QRegularExpression("[0-9]+>[0-9]+")))
213		{
214		// We are opening a new section, get the input/output code
215		// lengths and if they math what we expect, then set the
216		// current stream position and call the section parser.
217
218	✗	int greaterThanIndex = line.indexOf('>');
219
220	✗	QString codeLengthString = line.left(greaterThanIndex);
221
222		// qDebug() << __FILE__ << __LINE__
223		// << "Left codeLengthString:" << codeLengthString
224		// << "m_inputCodeLength:" << m_inputCodeLength;
225
226	✗	bool ok = false;
227	✗	int codeLength = codeLengthString.toInt(&ok, 10);
228
229	✗	if(!codeLength && !ok)
230		{
231		qDebug() << __FILE__ << __LINE__ << "Monomer dictionary"
232		<< "Failed to parse file " << m_filePath << "at line "
233		<< line;
234
235		return false;
236		}
237
238	✗	if(codeLength != m_inputCodeLength)
239		{
240		return false;
241		}
242
243	✗	codeLengthString = line.mid(greaterThanIndex + 1, -1);
244
245		// qDebug() << __FILE__ << __LINE__
246		// << "Right codeLengthString:" << codeLengthString
247		// << "m_outputCodeLength:" << m_outputCodeLength;
248
249	✗	ok = false;
250	✗	codeLength = codeLengthString.toInt(&ok, 10);
251
252	✗	if(!codeLength && !ok)
253		{
254		qDebug() << __FILE__ << __LINE__ << "Monomer dictionary"
255		<< "Failed to parse file " << m_filePath << "at line "
256		<< line;
257
258		return false;
259		}
260
261	✗	if(codeLength != m_outputCodeLength)
262		{
263		return false;
264		}
265
266		// At this point, it seems we are in the proper
267		// section.
268
269		return true;
270	✗	}
271
272		// If we are here, that means that the section is not for us.
273
274		// qDebug() << __FILE__ << __LINE__
275		// << "Line is no proper section divider.";
276
277		return false;
278		}
279
280		void
281	✗	MonomerDictionary::skipSection(QTextStream *stream)
282		{
283		// We have entered a section, all we have to do is go through it
284		// and return when we have found either the end of the stream or
285		// the {END} marker.
286
287	✗	qint64 lineLength = 1024;
288	✗	QString line;
289
290	✗	while(!stream->atEnd())
291		{
292	✗	line = stream->readLine(lineLength);
293
294	✗	if(!line.contains("{END}"))
295		{
296	✗	line = stream->readLine(lineLength);
297		}
298		else
299	✗	return;
300		}
301	✗	}
302
303		/*!
304		\brief Parses the Monomer dictionary file section in \a stream and fills in the \l m_dictionaryHash with the translation pair.
305		*/
306		int
307	✗	MonomerDictionary::parseSection(QTextStream *stream)
308		{
309	✗	Q_ASSERT(stream);
310
311	✗	qint64 lineLength = 1024;
312	✗	QString line;
313
314		// Iterate in the file using the stream and for each line create
315		// an item to insert into the dictionary hash.
316
317	✗	while(!stream->atEnd())
318		{
319	✗	line = stream->readLine(lineLength);
320
321		// We might encounter the end of the section, that is a line
322		// having {END} as its sole content.
323
324	✗	if(line.contains("{END}"))
325		break;
326
327	✗	QStringList stringList = line.split('>');
328
329	✗	QString inputCode = stringList.first();
330	✗	QString outputCode = stringList.last();
331
332		// Check that the monomer codes have the proper length.
333
334	✗	if(inputCode.length() != m_inputCodeLength \|\|
335	✗	outputCode.length() != m_outputCodeLength)
336		{
337	✗	qDebug() << __FILE__ << __LINE__ << QObject::tr("Monomer dictionary:")
338		<< QObject::tr("Failed to load dictionary.")
339		<< QObject::tr("Monomer code lengths do not match:")
340		<< QObject::tr("inputCode:") << inputCode
341		<< QObject::tr("outputCode:") << outputCode;
342
343
344		// We have to empty the hash
345	✗	m_dictionaryHash.clear();
346
347	✗	break;
348		}
349
350	✗	m_dictionaryHash.insert(inputCode, outputCode);
351
352		// qDebug() << __FILE__ << __LINE__
353		// << stringList.first () << stringList.last ();
354	✗	}
355
356		// At this point the parsing is finished, either because we
357		// encountered the {END} section-ending delimiter, or because we
358		// reached the en of file.
359
360	✗	int hashSize = m_dictionaryHash.size();
361
362	✗	if(hashSize)
363	✗	m_dictionaryLoaded = true;
364		else
365		{
366	✗	qDebug() << __FILE__ << __LINE__ << QObject::tr("Monomer dictionary:")
367		<< QObject::tr("Failed to load dictionary.");
368
369	✗	m_dictionaryLoaded = false;
370		}
371
372	✗	return hashSize;
373	✗	}
374
375		/*!
376		\brief Returns true if the Monomer dictionary file could be loaded successfully, false otherwise.
377		*/
378		bool
379	✗	MonomerDictionary::loadDictionary()
380		{
381		// Load the file and for each line deconstruct the item into two
382		// QString objects that are used to make a QHash entry in
383		// QHash<QString, QString> m_dictionaryHash.
384	✗	bool success = true;
385	✗	qint64 lineLength = 1024;
386	✗	QString line;
387
388	✗	QFile file(m_filePath);
389
390	✗	if(!file.open(QIODevice::ReadOnly))
391		{
392
393	✗	m_dictionaryLoaded = false;
394
395	✗	qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:"
396		<< "Failed to open file" << m_filePath << "for writing.";
397
398	✗	return false;
399		}
400
401	✗	if(m_inputCodeLength < 1 \|\| m_outputCodeLength < 1)
402		{
403		qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:"
404		<< "Failed to parse file " << m_filePath
405		<< "Please, set the m_inputCodeLength and "
406		"m_ouputCodeLength variables first.";
407
408		return false;
409		}
410
411	✗	QTextStream *stream = new QTextStream(&file);
412	✗	stream->setEncoding(QStringConverter::Utf8);
413
414	✗	while(!stream->atEnd())
415		{
416	✗	line = stream->readLine(lineLength);
417
418		// qDebug() << __FILE__ << __LINE__
419		// << "line: " << line;
420
421		// Remove spaces from start and end of line.
422	✗	line = line.simplified();
423
424	✗	if(line.startsWith('#') \|\| line.isEmpty())
425		{
426	✗	line = stream->readLine(lineLength);
427	✗	continue;
428		}
429
430		// There might be any number of sections in the file, all
431		// delimited with a X>Y directive, indicating how many
432		// characters are allowed for the input code and for the
433		// output code.
434
435	✗	if(!isLineProperSectionDivider(line))
436		{
437		// qDebug() << __FILE__ << __LINE__
438		// << "skipping line:" << line;
439
440	✗	line = stream->readLine(lineLength);
441	✗	continue;
442		}
443		else
444		{
445		// qDebug() << __FILE__ << __LINE__
446		// << "parsing section: " << line;
447
448	✗	if(parseSection(stream) < 1)
449		{
450		qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:"
451		<< "Failed to parse file " << m_filePath;
452
453		success = false;
454		break;
455		}
456		else
457		{
458		// We successfully parsed the section. Our work is done.
459
460	✗	success = true;
461		break;
462		}
463		}
464		}
465
466	✗	delete stream;
467
468	✗	return success;
469	✗	}
470
471		/*!
472		\brief Perform the actual translation from the input Monomer code form to the output Monomer code form on all the strings contained in \a chain_string_list.
473		*/
474		QStringList *
475	✗	MonomerDictionary::translate(const QStringList &chain_string_list)
476		{
477		// The string in sequence is a space-separated list of monomer
478		// codes in the original monomer code format. We have to translate
479		// that to the proper monomer code format using the hash in this
480		// dictionary.
481
482	✗	QStringList *outputChainStringList = new QStringList();
483
484	✗	if(!chain_string_list.isEmpty())
485	✗	m_inputChainStringList = chain_string_list;
486
487		// If there is nothing to do return an empty string list so that
488		// caller knows nothing is actually wrong, only there is no
489		// sequence to translate.
490	✗	if(m_inputChainStringList.isEmpty())
491		return outputChainStringList;
492
493		// Iterate in each chain string of the list and perform the
494		// translation.
495
496	✗	for(int iter = 0; iter < m_inputChainStringList.size(); ++iter)
497		{
498	✗	QString iterString = chain_string_list.at(iter);
499
500		// qDebug() << __FILE__ << __LINE__
501		// << "translating sequence:" << iterString;
502
503	✗	QStringList codeList =
504	✗	iterString.split(QRegularExpression("\\s+"), Qt::SkipEmptyParts);
505
506		// qDebug() << __FILE__ << __LINE__
507		// << "codeList:" << codeList;
508
509		// qDebug() << __FILE__ << __LINE__
510		// << "hash:"
511		// << m_dictionaryHash;
512
513	✗	for(int jter = 0; jter < codeList.size(); ++jter)
514		{
515	✗	QString code = codeList.at(jter);
516
517	✗	QHash<QString, QString>::const_iterator hashIter =
518	✗	m_dictionaryHash.find(code);
519
520	✗	if(hashIter != m_dictionaryHash.end())
521	✗	codeList.replace(jter, hashIter.value());
522		else
523		{
524		// Delete the string list, set the pointer to 0 and
525		// return that pointer so that caller knows something
526		// has gone wrong.
527
528	✗	qDebug() << __FILE__ << __LINE__ << "Monomer dictionary:"
529		<< "Failed to convert monomer code " << code;
530
531	✗	outputChainStringList->clear();
532
533	✗	delete outputChainStringList;
534	✗	outputChainStringList = nullptr;
535
536	✗	return outputChainStringList;
537		}
538	✗	}
539
540		// At this point the sequence codes have been translated. Join all
541		// the item of the codeList into one single string.
542
543	✗	outputChainStringList->append(codeList.join(QString("")));
544	✗	}
545
546		// End of
547		// for (int iter = 0; iter < chainStringList.size(); ++iter)
548
549		// If no translation could be performed, return a n
550
551	✗	if(!outputChainStringList->size())
552		{
553	✗	outputChainStringList->clear();
554
555	✗	delete outputChainStringList;
556	✗	outputChainStringList = 0;
557		}
558
559		return outputChainStringList;
560		}
561
562
563		} // namespace libXpertMassCore
564		} // namespace MsXpS
565