OpenMS
MascotGenericFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Andreas Bertsch, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
42 #include <OpenMS/SYSTEM/File.h>
44 
45 #include <vector>
46 #include <fstream>
47 
48 #ifdef _OPENMP
49 #include <omp.h>
50 #endif
51 
52 namespace OpenMS
53 {
63  class OPENMS_DLLAPI MascotGenericFile :
64  public ProgressLogger,
65  public DefaultParamHandler
66  {
67 public:
68 
71 
73  ~MascotGenericFile() override;
74 
76  void updateMembers_() override;
77 
79  void store(const String& filename, const PeakMap& experiment,
80  bool compact = false);
81 
83  void store(std::ostream& os, const String& filename,
84  const PeakMap& experiment, bool compact = false);
85 
93  template <typename MapType>
94  void load(const String& filename, MapType& exp)
95  {
96  if (!File::exists(filename))
97  {
98  throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, filename);
99  }
100 
101  exp.reset();
102 
103  std::ifstream is(filename.c_str());
104  // get size of file
105  is.seekg(0, std::ios::end);
106  startProgress(0, is.tellg(), "loading MGF");
107  is.seekg(0, std::ios::beg);
108 
109  UInt spectrum_number(0);
110  Size line_number(0); // carry line number for error messages within getNextSpectrum()
111 
112  typename MapType::SpectrumType spectrum;
113  spectrum.setMSLevel(2);
114  spectrum.getPrecursors().resize(1);
115  spectrum.setType(SpectrumSettings::SpectrumType::CENTROID); // MGF is always centroided, by definition
116  while (getNextSpectrum_(is, spectrum, line_number, spectrum_number))
117  {
118  exp.addSpectrum(spectrum);
119  setProgress(is.tellg());
120  ++spectrum_number;
121  } // next spectrum
122 
123  endProgress();
124  }
125 
133  std::pair<String, String> getHTTPPeakListEnclosure(const String& filename) const;
134 
136  void writeSpectrum(std::ostream& os, const PeakSpectrum& spec, const String& filename, const String& native_id_type_accession);
137 
138 protected:
139 
142 
144  std::map<String, String> mod_group_map_;
145 
147  void writeParameterHeader_(const String& name, std::ostream& os);
148 
150  void writeModifications_(const std::vector<String>& mods, std::ostream& os,
151  bool variable_mods = false);
152 
154  void writeHeader_(std::ostream& os);
155 
157  void writeMSExperiment_(std::ostream& os, const String& filename, const PeakMap& experiment);
158 
160  template <typename SpectrumType>
161  bool getNextSpectrum_(std::ifstream& is, SpectrumType& spectrum, Size& line_number, const Size& spectrum_number)
162  {
163  spectrum.resize(0);
164  spectrum.setNativeID(String("index=") + (spectrum_number));
165 
166  if (spectrum.metaValueExists("TITLE"))
167  {
168  spectrum.removeMetaValue("TITLE");
169  }
170  typename SpectrumType::PeakType p;
171 
172  String line;
173  // seek to next peak list block
174  while (getline(is, line, '\n'))
175  {
176  ++line_number;
177 
178  line.trim(); // remove whitespaces, line-endings etc
179 
180  // found peak list block?
181  if (line == "BEGIN IONS")
182  {
183  while (getline(is, line, '\n'))
184  {
185  ++line_number;
186  line.trim(); // remove whitespaces, line-endings etc
187 
188  if (line.empty()) continue;
189 
190  if (isdigit(line[0])) // actual data .. this comes first, since its the most common case
191  {
192  std::vector<String> split;
193  do
194  {
195  if (line.empty())
196  {
197  continue;
198  }
199 
200  line.simplify(); // merge double spaces (explicitly allowed by MGF), to prevent empty split() chunks and subsequent parse error
201  line.substitute('\t', ' '); // also accept Tab (strictly, only space(s) are allowed)
202  if (line.split(' ', split, false))
203  {
204  try
205  {
206  p.setPosition(split[0].toDouble());
207  p.setIntensity(split[1].toDouble());
208  }
209  catch (Exception::ConversionError& /*e*/)
210  {
211  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The content '" + line + "' at line #" + String(line_number) + " could not be converted to a number! Expected two (m/z int) or three (m/z int charge) numbers separated by whitespace (space or tab).", "");
212  }
213  spectrum.push_back(p);
214  }
215  else
216  {
217  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The content '" + line + "' at line #" + String(line_number) + " does not contain m/z and intensity values separated by whitespace (space or tab)!", "");
218  }
219  }
220  while (getline(is, line, '\n') && ++line_number && line.trim() != "END IONS"); // line.trim() is important here!
221 
222  if (line == "END IONS")
223  {
224  return true; // found end of spectrum
225  }
226  else
227  {
228  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, R"(Reached end of file. Found "BEGIN IONS" but not the corresponding "END IONS"!)", "");
229  }
230  }
231  else if (line.hasPrefix("PEPMASS")) // parse precursor position
232  {
233  String tmp = line.substr(8); // copy since we might need the original line for error reporting later
234  tmp.substitute('\t', ' ');
235  std::vector<String> split;
236  tmp.split(' ', split);
237  if (split.size() == 1)
238  {
239  spectrum.getPrecursors()[0].setMZ(split[0].trim().toDouble());
240  }
241  else if (split.size() == 2)
242  {
243  spectrum.getPrecursors()[0].setMZ(split[0].trim().toDouble());
244  spectrum.getPrecursors()[0].setIntensity(split[1].trim().toDouble());
245  }
246  else
247  {
248  throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Cannot parse PEPMASS in '" + line + "' at line #" + String(line_number) + " (expected 1 or 2 entries, but " + String(split.size()) + " were present)!", "");
249  }
250  }
251  else if (line.hasPrefix("CHARGE"))
252  {
253  String tmp = line.substr(7);
254  tmp.remove('+');
255  spectrum.getPrecursors()[0].setCharge(tmp.toInt());
256  }
257  else if (line.hasPrefix("RTINSECONDS"))
258  {
259  String tmp = line.substr(12);
260  spectrum.setRT(tmp.toDouble());
261  }
262  else if (line.hasPrefix("TITLE"))
263  {
264  // test if we have a line like "TITLE= Cmpd 1, +MSn(595.3), 10.9 min"
265  if (line.hasSubstring("min"))
266  {
267  try
268  {
269  std::vector<String> split;
270  line.split(',', split);
271  if (!split.empty())
272  {
273  for (Size i = 0; i != split.size(); ++i)
274  {
275  if (split[i].hasSubstring("min"))
276  {
277  std::vector<String> split2;
278  split[i].trim().split(' ', split2);
279  if (!split2.empty())
280  {
281  spectrum.setRT(split2[0].trim().toDouble() * 60.0);
282  }
283  }
284  }
285  }
286  }
287  catch (Exception::BaseException& /*e*/)
288  {
289  // just do nothing and write the whole title to spec
290  std::vector<String> split;
291  if (line.split('=', split))
292  {
293  if (!split[1].empty()) spectrum.setMetaValue("TITLE", split[1]);
294  }
295  }
296  }
297  else // just write the title as metainfo to the spectrum and add native ID to make the titles unique
298  {
299  Size firstEqual = line.find('=', 4);
300  if (firstEqual != std::string::npos)
301  {
302  if (String(spectrum.getMetaValue("TITLE")).hasSubstring(spectrum.getNativeID()))
303  {
304  spectrum.setMetaValue("TITLE", line.substr(firstEqual + 1));
305  }
306  else
307  {
308  spectrum.setMetaValue("TITLE", line.substr(firstEqual + 1) + "_" + spectrum.getNativeID());
309  }
310  }
311  }
312  }
313  else if (line.hasPrefix("NAME"))
314  {
315  String tmp = line.substr(5);
317  }
318  else if (line.hasPrefix("INCHI="))
319  {
320  String tmp = line.substr(6);
322  }
323  else if (line.hasPrefix("SMILES"))
324  {
325  String tmp = line.substr(7);
327  }
328  else if (line.hasPrefix("SPECTRUMID"))
329  {
330  String tmp = line.substr(11);
331  spectrum.setMetaValue("GNPS_Spectrum_ID", tmp);
332  }
333  else if (line.hasPrefix("SCANS="))
334  {
335  String tmp = line.substr(6);
336  spectrum.setMetaValue("Scan_ID", tmp);
337  }
338  }
339  }
340  }
341 
342  return false; // found end of file
343  }
344 
345  };
346 
347 } // namespace OpenMS
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
Exception base class.
Definition: Exception.h:91
Invalid conversion exception.
Definition: Exception.h:356
File not found exception.
Definition: Exception.h:511
Parse Error exception.
Definition: Exception.h:624
static bool exists(const String &file)
Method used to test if a file exists.
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:72
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
void reset()
Clear all internal data (spectra, ranges, metadata)
The representation of a 1D spectrum.
Definition: MSSpectrum.h:70
void setMSLevel(UInt ms_level)
Sets the MS level.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Read/write Mascot generic files (MGF).
Definition: MascotGenericFile.h:66
bool store_compact_
use a compact format for storing (no zero-intensity peaks, limited number of decimal places)?
Definition: MascotGenericFile.h:141
std::pair< String, String > getHTTPPeakListEnclosure(const String &filename) const
enclosing Strings of the peak list body for HTTP submission
void store(const String &filename, const PeakMap &experiment, bool compact=false)
stores the experiment data in a MascotGenericFile that can be used as input for MASCOT shell executio...
void writeHeader_(std::ostream &os)
writes the full header
void writeModifications_(const std::vector< String > &mods, std::ostream &os, bool variable_mods=false)
write a list of (fixed or variable) modifications
void writeParameterHeader_(const String &name, std::ostream &os)
writes a parameter header
void writeMSExperiment_(std::ostream &os, const String &filename, const PeakMap &experiment)
writes the MSExperiment
void load(const String &filename, MapType &exp)
loads a Mascot Generic File into a PeakMap
Definition: MascotGenericFile.h:94
~MascotGenericFile() override
destructor
void writeSpectrum(std::ostream &os, const PeakSpectrum &spec, const String &filename, const String &native_id_type_accession)
writes a spectrum in MGF format to an ostream
bool getNextSpectrum_(std::ifstream &is, SpectrumType &spectrum, Size &line_number, const Size &spectrum_number)
reads a spectrum block, the section between 'BEGIN IONS' and 'END IONS' of a MGF file
Definition: MascotGenericFile.h:161
void store(std::ostream &os, const String &filename, const PeakMap &experiment, bool compact=false)
store the experiment data in a MascotGenericFile; the output is written to the given stream,...
void updateMembers_() override
docu in base class
MascotGenericFile()
constructor
std::map< String, String > mod_group_map_
mapping of modifications with specificity groups, that have to be treated specially (e....
Definition: MascotGenericFile.h:144
bool metaValueExists(const String &name) const
Returns whether an entry with the given name exists.
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
const DataValue & getMetaValue(const String &name) const
Returns the value corresponding to a string, or DataValue::EMPTY if not found.
void removeMetaValue(const String &name)
Removes the DataValue corresponding to name if it exists.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:110
void setPosition(PositionType const &position)
Mutable access to the position.
Definition: Peak1D.h:149
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:53
void setType(SpectrumType type)
sets the spectrum type
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software.
void setNativeID(const String &native_id)
sets the native identifier for the spectrum, used by the acquisition software.
A more convenient string class.
Definition: String.h:60
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
bool hasPrefix(const String &string) const
true if String begins with string, false otherwise
String & simplify()
merges subsequent whitespaces to one blank character
bool hasSubstring(const String &string) const
true if String contains the string, false otherwise
String & remove(char what)
Remove all occurrences of the character what.
Int toInt() const
Conversion to Int.
double toDouble() const
Conversion to double.
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
const std::string MSM_SMILES_STRING
Definition: Constants.h:567
const std::string MSM_INCHI_STRING
Definition: Constants.h:562
const std::string MSM_METABOLITE_NAME
Definition: Constants.h:557
static String & trim(String &this_s)
Definition: StringUtilsSimple.h:229
static bool split(const String &this_s, const char splitter, std::vector< String > &substrings, bool quote_protect)
Definition: StringUtilsSimple.h:365
static bool hasSubstring(const String &this_s, const String &string)
Definition: StringUtilsSimple.h:137
static double toDouble(const String &this_s)
Definition: StringUtils.h:242
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48