OpenMS
XTandemXMLFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Andreas Bertsch $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
38 #include <OpenMS/FORMAT/XMLFile.h>
41 #include <stack>
42 
43 namespace OpenMS
44 {
45  class String;
46  class ProteinIdentification;
47 
56  class OPENMS_DLLAPI XTandemXMLFile :
57  protected Internal::XMLHandler,
58  public Internal::XMLFile
59  {
60 public:
61 
64 
66  ~XTandemXMLFile() override;
80  void load(const String& filename, ProteinIdentification& protein_identification, std::vector<PeptideIdentification>& id_data, ModificationDefinitionsSet& mod_def_set);
81 
82 
83 protected:
84 
85  // Docu in base class
86  void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
87 
88  // Docu in base class
89  void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
90 
91  // Docu in base class
92  void characters(const XMLCh* const chars, const XMLSize_t /*length*/) override;
93 
95 
97 
98 private:
99 
101 
102  // true during "note" element containing protein accession
104 
105  // true during "note" element containing spectrum ID
107 
108  // true after non-new protein entries, so that with the next "protein note" the
109  // accession will not be updated again
111 
112  // peptide hits per spectrum
113  std::map<UInt, std::vector<PeptideHit> > peptide_hits_;
114 
115  // protein hits
116  std::vector<ProteinHit> protein_hits_;
117 
118  // protein unique IDs (assigned by X! Tandem), to keep track of which proteins were already seen
119  std::set<UInt> protein_uids_;
120 
121  // accession of the current protein
123 
124  // charge of current peptide
126 
127  // X! Tandem ID of current peptide
129 
130  // tag
132 
133  // start position of current peptide in protein sequence
135 
136  // stop position of current peptide in protein sequence
138 
139  // previous peptide sequence
141 
142  // mapping from X! Tandem ID to spectrum ID
143  std::map<UInt, String> spectrum_ids_;
144 
145  // modification definitions
147 
148  // modifications used by X! Tandem by default
150 
151  // the possible type attributes of the group tag elements
152  enum class GroupType
153  {
154  MODEL,
155  PARAMETERS,
156  SUPPORT
157  };
158 
159  // stack of types of the group elements
160  // they can be nested (e.g. a support group in a model group)
161  // parsing of child elements sometimes depends on the group type
162  std::stack<GroupType> group_type_stack_;
163 
164  };
165 
166 } // namespace OpenMS
167 
Base class for loading/storing XML files that have a handler derived from XMLHandler.
Definition: XMLFile.h:49
Base class for XML handlers.
Definition: XMLHandler.h:326
Representation of a set of modification definitions.
Definition: ModificationDefinitionsSet.h:59
Representation of a protein identification run.
Definition: ProteinIdentification.h:76
A more convenient string class.
Definition: String.h:60
Used to load XTandemXML files.
Definition: XTandemXMLFile.h:59
String current_protein_
Definition: XTandemXMLFile.h:122
ProteinIdentification * protein_identification_
Definition: XTandemXMLFile.h:100
bool is_spectrum_note_
Definition: XTandemXMLFile.h:106
std::map< UInt, std::vector< PeptideHit > > peptide_hits_
Definition: XTandemXMLFile.h:113
GroupType
Definition: XTandemXMLFile.h:153
ModificationDefinitionsSet default_nterm_mods_
Definition: XTandemXMLFile.h:149
String tag_
Definition: XTandemXMLFile.h:131
XTandemXMLFile & operator=(const XTandemXMLFile &rhs)
Int current_charge_
Definition: XTandemXMLFile.h:125
std::vector< ProteinHit > protein_hits_
Definition: XTandemXMLFile.h:116
ModificationDefinitionsSet mod_def_set_
Definition: XTandemXMLFile.h:146
std::stack< GroupType > group_type_stack_
Definition: XTandemXMLFile.h:162
bool skip_protein_acc_update_
Definition: XTandemXMLFile.h:110
std::set< UInt > protein_uids_
Definition: XTandemXMLFile.h:119
UInt current_id_
Definition: XTandemXMLFile.h:128
void startElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname, const xercesc::Attributes &attributes) override
~XTandemXMLFile() override
Destructor.
void characters(const XMLCh *const chars, const XMLSize_t) override
std::map< UInt, String > spectrum_ids_
Definition: XTandemXMLFile.h:143
void endElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname) override
UInt current_stop_
Definition: XTandemXMLFile.h:137
UInt current_start_
Definition: XTandemXMLFile.h:134
bool is_protein_note_
Definition: XTandemXMLFile.h:103
String previous_seq_
Definition: XTandemXMLFile.h:140
XTandemXMLFile(const XTandemXMLFile &rhs)
XTandemXMLFile()
Default constructor.
int Int
Signed integer type.
Definition: Types.h:102
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
void load(const String &filename, ProteinIdentification &protein_identification, std::vector< PeptideIdentification > &id_data, ModificationDefinitionsSet &mod_def_set)
loads data from an X! Tandem XML file
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48