libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
msfileaccessor.cpp
Go to the documentation of this file.
1// #include <proteowizard/pwiz/data/msdata/DefaultReaderList.hpp>
2
3#include <QDebug>
4#include <QFile>
5#include <QFileInfo>
6
7
8#include "msfileaccessor.h"
9#include "pwizmsfilereader.h"
10#include "timsmsfilereader.h"
11#include "bafasciifilereader.h"
12#include "xymsfilereader.h"
13
14
15#include "../exception/exceptionnotfound.h"
16#include "../exception/exceptionnotpossible.h"
17#include "../exception/exceptionnotrecognized.h"
18#include "../msrun/msrunid.h"
19#include "../msrun/private/timsframesmsrunreader.h"
20
21#include "../msrun/private/pwizmsrunreader.h"
22#include "../msrun/private/timsmsrunreader.h"
23#include "../msrun/private/timsmsrunreaderms2.h"
24#include "../msrun/private/timsmsrunreaderdia.h"
25#include "../msrun/bafasciimsrunreader.h"
26#include "../msrun/xymsrunreader.h"
27
28#include "../utils.h"
29
30
31namespace pappso
32{
33
34
35MsFileAccessor::MsFileAccessor(const QString &file_name,
36 const QString &xml_prefix)
37 : m_fileName(file_name), m_xmlPrefix(xml_prefix)
38{
39 QFile file(file_name);
40 if(!file.exists())
41 throw(ExceptionNotFound(QObject::tr("File %1 not found.")
42 .arg(QFileInfo(file_name).absoluteFilePath())));
43
44
46 m_oboPsiModTermNativeIDFormat.m_name = "no nativeID format";
48 "No nativeID format indicates that the file tagged with this term does not "
49 "contain spectra that can have a nativeID format.";
50}
51
52
54 : m_fileName(other.m_fileName),
55 m_xmlPrefix(other.m_xmlPrefix),
56 m_fileFormat(other.m_fileFormat),
57 m_fileReaderType(other.m_fileReaderType)
58{
60}
61
65
66
67const QString &
69{
70 return m_fileName;
71}
72
73
79
80const OboPsiModTerm
82{
83 OboPsiModTerm term;
84
85 // is_a: MS:1000560 ! mass spectrometer file format
86 switch(m_fileFormat)
87 {
89 term.m_accession = "MS:1001560";
90 term.m_name = "SCIEX TOF/TOF T2D format";
91 term.m_definition =
92 "Applied Biosystems/MDS Analytical Technologies TOF/TOF instrument "
93 "export format.";
94 break;
96 term.m_accession = "MS:1000562";
97 term.m_name = "ABI WIFF format";
98 term.m_definition = "Applied Biosystems WIFF file format.";
99 break;
101 term.m_accession = "MS:1001509";
102 term.m_name = "Agilent MassHunter format";
103 term.m_definition =
104 "A data file format found in an Agilent MassHunter directory which "
105 "contains raw data acquired by an Agilent mass spectrometer.";
106 break;
108 break;
110 term.m_accession = "MS:1000825";
111 term.m_name = "Bruker FID format";
112 term.m_definition = "Bruker FID file format.";
113 break;
115 term.m_accession = "MS:1002817";
116 term.m_name = "Bruker TDF format";
117 term.m_definition = "Bruker TDF raw file format.";
118 break;
120 term.m_accession = "MS:1000567";
121 term.m_name = "Bruker/Agilent YEP format";
122 term.m_definition = "Bruker/Agilent YEP file format.";
123 break;
125 term.m_accession = "MS:1001062";
126 term.m_name = "Mascot MGF format";
127 term.m_definition = "Mascot MGF file format.";
128 break;
130 break;
132 term.m_accession = "MS:1001881";
133 term.m_name = "mz5 format";
134 term.m_definition = "mz5 file format, modelled after mzML.";
135 break;
137 term.m_accession = "MS:1000584";
138 term.m_name = "mzML format";
139 term.m_definition =
140 "Proteomics Standards Inititative mzML file format.";
141 break;
143 term.m_accession = "MS:1000566";
144 term.m_name = "ISB mzXML format";
145 term.m_definition = "Institute of Systems Biology mzXML file format.";
146 break;
148 break;
150
151 term.m_accession = "MS:1000563";
152 term.m_name = "Thermo RAW format";
153 term.m_definition = "Thermo Scientific RAW file format.";
154 break;
156 break;
158 term.m_accession = "MS:1000526";
159 term.m_name = "Waters raw format";
160 term.m_definition =
161 "Waters data file format found in a Waters RAW directory, generated "
162 "from an MS acquisition.";
163 break;
165 term.m_accession = "MS:1001369";
166 term.m_name = "BafAscii text format";
167 term.m_definition =
168 "Simple text file format obtained by exporting Bruker Baf to ascii "
169 "using Bruker software";
170 break;
171 case MsDataFormat::xy:
172 term.m_accession = "MS:1001369";
173 term.m_name = "text format";
174 term.m_definition =
175 "Simple text file format of \"m/z<separator>intensity\" value pairs "
176 "for a single mass spectrum, a PMF (or single MS2) search.";
177 break;
178 default:
179 break;
180 }
181
182 return term;
183}
184
185
186const OboPsiModTerm &
193
194
195std::vector<MsRunIdCstSPtr>
197{
198 // qDebug();
199
200 // Try the PwizMsFileReader
201
202 PwizMsFileReader pwiz_ms_file_reader(m_fileName);
203
204 std::vector<MsRunIdCstSPtr> ms_run_ids =
205 pwiz_ms_file_reader.getMsRunIds(m_xmlPrefix);
206 if(ms_run_ids.size())
207 {
208 qDebug() << "Might well be handled using the Pwiz code.";
209
210 m_fileFormat = pwiz_ms_file_reader.getFileFormat();
212
213 // But the user might have configured one preferred reader type.
214
216 if(pref != m_preferredFileReaderTypeMap.end())
217 {
218 m_fileReaderType = pref->second;
219 }
220
221 return ms_run_ids;
222 }
223
224 qDebug() << "The Pwiz reader did not work.";
225
226 // Try the TimsData reader
227
228 QString tims_dir = m_fileName;
229 if(!QFileInfo(tims_dir).isDir())
230 {
231 tims_dir = QFileInfo(m_fileName).absolutePath();
232 }
233
234 TimsMsFileReader tims_file_reader(tims_dir);
235
236 ms_run_ids = tims_file_reader.getMsRunIds(m_xmlPrefix);
237
238 if(ms_run_ids.size())
239 {
240 qDebug() << "Might well be handled using the Bruker code";
241
242 m_fileName = tims_dir;
243 m_fileFormat = tims_file_reader.getFileFormat();
245
247 if(pref != m_preferredFileReaderTypeMap.end())
248 {
249 m_fileReaderType = pref->second;
250 }
251
252 qDebug() << "Returning Bruker::tims ms run(s)."
253 << "with preferred reader type:"
255
256 return ms_run_ids;
257 }
258
259 qDebug() << "The Tims reader did not work.";
260
261 // Try the Baf->ascii export format from Bruker Compass
262
263 try
264 {
265 ms_run_ids.clear();
266 BafAsciiFileReader baf_ascii_ms_file_reader(m_fileName);
267
268 ms_run_ids = baf_ascii_ms_file_reader.getMsRunIds(m_xmlPrefix);
269
270 if(ms_run_ids.size())
271 {
272 qDebug() << "Might well be handled using the BafAscii code";
273
275
276 m_fileFormat = baf_ascii_ms_file_reader.getFileFormat();
277
279 {
280 ms_run_ids.clear();
281 }
282 else
283 {
284 return ms_run_ids;
285 }
286 }
287 }
288 catch(const pappso::PappsoException &error)
289 {
290 qDebug() << "This is not a BafAscii code file" << error.qwhat();
291 }
292
293
294 qDebug() << "The BafAscii reader did not work.";
295
296 // At this point try the XyMsFileReader
297
298 XyMsFileReader xy_ms_file_reader(m_fileName);
299
300 ms_run_ids = xy_ms_file_reader.getMsRunIds(m_xmlPrefix);
301
302 if(ms_run_ids.size())
303 {
304 qDebug() << "Might well be handled using the XY code";
305
307
308 m_fileFormat = xy_ms_file_reader.getFileFormat();
309
310 return ms_run_ids;
311 }
312
313 qDebug() << "The XY reader did not work.";
314
315 return ms_run_ids;
316}
317
318
319void
321 FileReaderType reader_type)
322{
323 // qDebug();
324
325 auto ret = m_preferredFileReaderTypeMap.insert(
326 std::pair<MsDataFormat, FileReaderType>(format, reader_type));
327
328 if(!ret.second)
329 {
330 // replace
331 ret.first->second = reader_type;
332 }
333}
334
335
338{
339 // qDebug();
340
341 auto ret = m_preferredFileReaderTypeMap.find(format);
342
343 if(ret != m_preferredFileReaderTypeMap.end())
344 {
345 return ret->second;
346 }
347
348 return m_fileReaderType;
349}
350
351
357
358
359void
361{
362 mcsp_selectedMsRunId = ms_run_id_csp;
363}
364
365
371
374{
375 // try TimsData reader
376 QString tims_dir = m_fileName;
377 if(!QFileInfo(tims_dir).isDir())
378 {
379 tims_dir = QFileInfo(m_fileName).absolutePath();
380 }
381 TimsMsFileReader tims_file_reader(tims_dir);
382
383 std::vector<MsRunIdCstSPtr> ms_run_ids =
384 tims_file_reader.getMsRunIds(m_xmlPrefix);
385
386 if(ms_run_ids.size())
387 {
388 // qDebug() << "Might well be handled using the Bruker code";
390 m_fileFormat = tims_file_reader.getFileFormat();
391 m_fileName = tims_dir;
392
393 return std::make_shared<TimsMsRunReaderMs2>(ms_run_ids.front());
394 }
395 else
396 {
398 QObject::tr("Unable to read mz data directory %1 with TimsTOF reader.")
399 .arg(tims_dir)));
400 }
401}
402
403
406{
407 // qDebug();
408
409 // We want to return a MsRunReader that accounts for the configuration that
410 // the user might have set.
411
412 if(m_fileName != ms_run_id->getFileName())
414 QObject::tr("The MsRunId instance must have the name file name as the "
415 "MsFileAccessor.")));
416
418 {
419 // qDebug() << "Returning a PwizMsRunReader.";
420 auto pwiz_reader = std::make_shared<PwizMsRunReader>(ms_run_id);
422 pwiz_reader->getOboPsiModTermNativeIDFormat();
423 return pwiz_reader;
424 }
426 {
427 // qDebug() << "Returning a XyMsRunReader.";
428
429 return std::make_shared<XyMsRunReader>(ms_run_id);
430 }
432 {
433 // qDebug() << "Returning a TimsMsRunReader.";
434
435 return std::make_shared<TimsMsRunReader>(ms_run_id);
436 }
439 {
440 // qDebug() << "Returning a TimsFramesMsRunReader.";
441
442 return std::make_shared<TimsFramesMsRunReader>(ms_run_id);
443 }
445 {
446 // qDebug() << "Returning a TimsMsRunReaderMs2.";
447
448 return std::make_shared<TimsMsRunReaderMs2>(ms_run_id);
449 }
451 {
452 // qDebug() << "Returning a TimsMsRunReaderMs2.";
453
454 //qInfo() << "std::make_shared<TimsMsRunReaderDia>(ms_run_id);";
455 return std::make_shared<TimsMsRunReaderDia>(ms_run_id);
456 }
458 {
459 // qDebug() << "Returning a BafAsciiMsRunReader.";
460
461 return std::make_shared<BafAsciiMsRunReader>(ms_run_id);
462 }
464 {
465 if(ms_run_id.get()->getMsDataFormat() == MsDataFormat::xy)
466 {
467 return std::make_shared<XyMsRunReader>(ms_run_id);
468 }
469 else
470 {
471 auto pwiz_reader = std::make_shared<PwizMsRunReader>(ms_run_id);
473 pwiz_reader->getOboPsiModTermNativeIDFormat();
474 return pwiz_reader;
475 }
476 }
477 else
478 {
479 throw PappsoException(QObject::tr("No file format was found."));
480 }
481
482 return nullptr;
483}
484
485
487MsFileAccessor::msRunReaderSPtr(std::size_t ms_run_id_index)
488{
489 std::vector<MsRunIdCstSPtr> ms_run_ids = getMsRunIds();
490 if(ms_run_id_index >= ms_run_ids.size())
491 throw PappsoException(QObject::tr("MsRunId request out-of-bound error."));
492
493 return msRunReaderSPtr(ms_run_ids.at(ms_run_id_index));
494}
495
496
504
505
511
514 MsRunIdCstSPtr ms_run_id, pappso::FileReaderType preferred_file_reader_type)
515{
516 QFile file(ms_run_id.get()->getFileName());
517 if(!file.exists())
518 throw(ExceptionNotFound(
519 QObject::tr("unable to build a reader : file %1 not found.")
520 .arg(QFileInfo(ms_run_id.get()->getFileName()).absoluteFilePath())));
521
522 MsDataFormat file_format = ms_run_id.get()->getMsDataFormat();
523
524 if(file_format == MsDataFormat::xy)
525 {
526 // qDebug() << "Returning a XyMsRunReader.";
527
528 return std::make_shared<XyMsRunReader>(ms_run_id);
529 }
530 else if(file_format == MsDataFormat::brukerBafAscii)
531 {
532 // qDebug() << "Returning a XyMsRunReader.";
533
534 return std::make_shared<BafAsciiMsRunReader>(ms_run_id);
535 }
536 else if(file_format == MsDataFormat::unknown)
537 {
538 throw(PappsoException(
539 QObject::tr("unable to build a reader for %1 : unknown file format")
540 .arg(QFileInfo(ms_run_id.get()->getFileName()).absoluteFilePath())));
541 }
542
543 else if(file_format == MsDataFormat::brukerTims)
544 {
545 if(preferred_file_reader_type == pappso::FileReaderType::tims)
546 {
547 return std::make_shared<TimsMsRunReader>(ms_run_id);
548 }
549 else if(preferred_file_reader_type == pappso::FileReaderType::tims_ms2)
550 {
551 return std::make_shared<TimsMsRunReaderMs2>(ms_run_id);
552 }
553 else if(preferred_file_reader_type == pappso::FileReaderType::tims_frames)
554 {
555 qDebug()
556 << "returning std::make_shared<TimsFramesMsRunReader>(ms_run_id).";
557 return std::make_shared<TimsFramesMsRunReader>(ms_run_id);
558 }
559 // qDebug() << "by default, build a TimsMsRunReader.";
560 return std::make_shared<TimsMsRunReader>(ms_run_id);
561 }
562 else
563 {
564 // qDebug() << "Returning a PwizMsRunReader .";
565 return std::make_shared<PwizMsRunReader>(ms_run_id);
566 }
567}
568
569
572 const QString &xml_id)
573{
574 std::vector<MsRunIdCstSPtr> run_list = getMsRunIds();
575 MsRunReaderSPtr reader_sp;
576 for(MsRunIdCstSPtr &original_run_id : run_list)
577 {
578 if(original_run_id.get()->getRunId() == run_id)
579 {
580 MsRunId new_run_id(*original_run_id.get());
581 new_run_id.setXmlId(xml_id);
582
583 return msRunReaderSPtr(std::make_shared<MsRunId>(new_run_id));
584 }
585 }
586
587 if((run_id.isEmpty()) && (run_list.size() == 1))
588 {
589 MsRunId new_run_id(*run_list[0].get());
590 new_run_id.setXmlId(xml_id);
591
592 return msRunReaderSPtr(std::make_shared<MsRunId>(new_run_id));
593 }
594
595
596 if(reader_sp == nullptr)
597 {
598 throw(
599 ExceptionNotFound(QObject::tr("run id %1 not found in file %2")
600 .arg(run_id)
601 .arg(QFileInfo(m_fileName).absoluteFilePath())));
602 }
603 return reader_sp;
604}
605
606
607} // namespace pappso
virtual MsDataFormat getFileFormat() override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
const OboPsiModTerm & getOboPsiModTermNativeIDFormat() const
get OboPsiModTerm corresponding to the nativeID format format of mz data
std::map< MsDataFormat, FileReaderType > m_preferredFileReaderTypeMap
MsRunIdCstSPtr getSelectedMsRunId() const
MsRunReaderSPtr msRunReaderSPtr(MsRunIdCstSPtr ms_run_id)
void setPreferredFileReaderType(MsDataFormat format, FileReaderType reader_type)
given an mz format, explicitly set the preferred reader
FileReaderType getpreferredFileReaderType(MsDataFormat format)
MsRunReaderSPtr msRunReaderSPtrForSelectedMsRunId()
FileReaderType getFileReaderType() const
get the file reader type
MsRunIdCstSPtr mcsp_selectedMsRunId
MsDataFormat getFileFormat() const
get the raw format of mz data
std::vector< MsRunIdCstSPtr > getMsRunIds()
OboPsiModTerm m_oboPsiModTermNativeIDFormat
void setSelectedMsRunId(MsRunIdCstSPtr ms_run_id_csp)
const OboPsiModTerm getOboPsiModTermFileFormat() const
get OboPsiModTerm corresponding to the raw format of mz data
FileReaderType m_fileReaderType
MsRunReaderSPtr getMsRunReaderSPtrByRunId(const QString &run_id, const QString &xml_id)
get an msrun reader by finding the run_id in file
MsFileAccessor(const QString &file_name, const QString &xml_prefix)
static MsRunReaderSPtr buildMsRunReaderSPtr(MsRunIdCstSPtr ms_run_id)
get an MsRunReader directly from a valid MsRun ID
TimsMsRunReaderMs2SPtr buildTimsMsRunReaderMs2SPtr()
if possible, builds directly a dedicated Tims TOF tdf file reader
const QString & getFileName() const
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition msrunid.h:54
void setXmlId(const QString &xml_id)
set an XML unique identifier for this MsRunId
Definition msrunid.cpp:137
virtual const QString & qwhat() const
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual MsDataFormat getFileFormat() override
virtual MsDataFormat getFileFormat() override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
static QString fileReaderTypeAsString(FileReaderType file_reader_type)
Definition utils.cpp:519
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual MsDataFormat getFileFormat() override
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< MsRunReader > MsRunReaderSPtr
Definition msrunreader.h:56
MsDataFormat
Definition types.h:120
@ xy
(x,y) format
@ unknown
unknown format
@ SQLite3
SQLite3 format.
@ MGF
Mascot format.
std::shared_ptr< TimsMsRunReaderMs2 > TimsMsRunReaderMs2SPtr
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
FileReaderType
Definition types.h:147
@ pwiz
using libpwizlite
@ tims
TimsMsRunReader : each scan is returned as a mass spectrum.
MSrun file reader for native Bruker TimsTOF raw data.