OpenMS
XQuestResultXMLHandler.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Eugen Netz $
32 // $Authors: Lukas Zimmermann $
33 // --------------------------------------------------------------------------
34 #pragma once
35 
42 
43 namespace OpenMS
44 {
45  namespace Internal
46  {
49  class OPENMS_DLLAPI XQuestResultXMLHandler :
50  public XMLHandler
51  {
52  public:
53 
55  static std::map< Size, String > enzymes;
56 
58  static std::map<String, UInt> months;
59 
61  XQuestResultXMLHandler(const String & filename,
62  std::vector< PeptideIdentification > & pep_ids,
63  std::vector< ProteinIdentification > & prot_ids
64  );
65 
67  XQuestResultXMLHandler(const std::vector<ProteinIdentification>& pro_id,
68  const std::vector<PeptideIdentification>& pep_id,
69  const String& filename,
70  const String& version
71  );
72 
74 
75  // Docu in base class
76  void endElement(const XMLCh * const uri, const XMLCh * const local_name, const XMLCh * const qname) override;
77 
78  // Docu in base class
79  void startElement(const XMLCh * const uri, const XMLCh * const local_name, const XMLCh * const qname, const xercesc::Attributes & attributes) override;
80 
85  double getMinScore() const;
86 
91  double getMaxScore() const;
92 
98 
99  //Docu in base class
100  void writeTo(std::ostream& os) override;
101 
102  // TODO move these to StringUtils?
111  static StringList splitByNth(const String& input, const char separator, const Size n);
112 
125  static StringList splitByMiddle(const String& input, const char separator);
126 
127  private:
128 
129 
130  // Decoy string used by xQuest, initialize to a default value
131  String decoy_string_ = "decoy_";
135 
136  // Main data structures that are populated during loading the file
137  std::vector< PeptideIdentification >* pep_ids_;
138  std::vector< ProteinIdentification >* prot_ids_;
139 
140  // internal ID items for writing files
141  const std::vector<ProteinIdentification>* cpro_id_;
142  const std::vector<PeptideIdentification>* cpep_id_;
143 
145 
146  // Keeps track of the minscore and maxscore encountered
147  double min_score_;
148  double max_score_;
149 
152 
154  std::set< String > accessions_;
155 
158 
160  std::set< UInt > charges_;
163 
164  // Current Retention time of spectrum pair
165  double rt_light_;
166  double rt_heavy_;
167 
168  // Current experimental m/z of spectrum pair
169  double mz_light_;
170  double mz_heavy_;
171 
172  // primary MS run path
175 
177  std::vector< PeptideIdentification > current_spectrum_search_;
178 
180  std::map<String, DataValue> peptide_id_meta_values_;
181 
187  inline void extractDateTime_(const String & xquest_datetime_string, DateTime & date_time) const;
188 
195  void addMetaValues_(MetaInfoInterface & meta_info_interface);
196 
202  void getLinkPosition_(const xercesc::Attributes & attributes, std::pair<SignedSize, SignedSize> & pair);
203 
209  void setPeptideEvidence_(const String & prot_string, PeptideHit & pep_hit);
210 
211  };
212  } // namespace Internal
213 } // namespace OpenMS
DateTime Class.
Definition: DateTime.h:59
Base class for XML handlers.
Definition: XMLHandler.h:326
XMLHandler for the result files of XQuest.
Definition: XQuestResultXMLHandler.h:51
double getMinScore() const
Returns the minimum score encountered in the file.
XQuestResultXMLHandler(const std::vector< ProteinIdentification > &pro_id, const std::vector< PeptideIdentification > &pep_id, const String &filename, const String &version)
Constructor for a write-only handler for internal identification structures.
void endElement(const XMLCh *const uri, const XMLCh *const local_name, const XMLCh *const qname) override
double max_score_
Definition: XQuestResultXMLHandler.h:148
void writeTo(std::ostream &os) override
Writes the contents to a stream.
static std::map< Size, String > enzymes
Maps enzyme_num in xQuest result file to the enzyme name used by OpenMS.
Definition: XQuestResultXMLHandler.h:55
UInt getNumberOfHits() const
Returns the total number of hits in the file.
const std::vector< ProteinIdentification > * cpro_id_
Definition: XQuestResultXMLHandler.h:141
ProteaseDB * enzymes_db_
The enzyme database for enzyme lookup.
Definition: XQuestResultXMLHandler.h:157
void addMetaValues_(MetaInfoInterface &meta_info_interface)
Assigns all meta values stored in the peptide_id_attributes member to an meta info interface.
UInt n_hits_
Total no. of hits found in the result XML file.
Definition: XQuestResultXMLHandler.h:144
std::set< String > accessions_
Set of all protein accessions that are within the ProteinHits.
Definition: XQuestResultXMLHandler.h:154
std::map< String, DataValue > peptide_id_meta_values_
Stores the attributes of a record (peptide identification)
Definition: XQuestResultXMLHandler.h:180
std::set< UInt > charges_
Keeps track of the charges of the hits.
Definition: XQuestResultXMLHandler.h:160
void getLinkPosition_(const xercesc::Attributes &attributes, std::pair< SignedSize, SignedSize > &pair)
Gets the link location of a xQuest xlinkPositionString.
void setPeptideEvidence_(const String &prot_string, PeptideHit &pep_hit)
Sets the peptide evidence for Alpha and Beta.
double min_score_
Definition: XQuestResultXMLHandler.h:147
static StringList splitByMiddle(const String &input, const char separator)
counts occurrences of the @separator and splits the @input string into two at the middle
void extractDateTime_(const String &xquest_datetime_string, DateTime &date_time) const
Extracts the DateTime from datetime string from xQuest.
std::vector< ProteinIdentification > * prot_ids_
Definition: XQuestResultXMLHandler.h:138
String cross_linker_name_
Definition: XQuestResultXMLHandler.h:134
double mz_heavy_
Definition: XQuestResultXMLHandler.h:170
double rt_heavy_
Definition: XQuestResultXMLHandler.h:166
String spectrum_input_file_
Definition: XQuestResultXMLHandler.h:174
StringList ms_run_path_
Definition: XQuestResultXMLHandler.h:173
static StringList splitByNth(const String &input, const char separator, const Size n)
splits the @input string at the nth occurrence of the @separator
const std::vector< PeptideIdentification > * cpep_id_
Definition: XQuestResultXMLHandler.h:142
double mz_light_
Definition: XQuestResultXMLHandler.h:169
std::vector< PeptideIdentification > current_spectrum_search_
The current spectrum search.
Definition: XQuestResultXMLHandler.h:177
UInt min_precursor_charge_
Definition: XQuestResultXMLHandler.h:161
UInt max_precursor_charge_
Definition: XQuestResultXMLHandler.h:162
int spectrum_index_light_
Definition: XQuestResultXMLHandler.h:132
XQuestResultXMLHandler(const String &filename, std::vector< PeptideIdentification > &pep_ids, std::vector< ProteinIdentification > &prot_ids)
Constructor for a read-only handler for internal identification structures.
static std::map< String, UInt > months
Maps String encoding month to the numeric value.
Definition: XQuestResultXMLHandler.h:58
bool is_openpepxl_
Whether or not current xquest result tag comes from OpenPepXL (xQuest otherwise)
Definition: XQuestResultXMLHandler.h:151
void startElement(const XMLCh *const uri, const XMLCh *const local_name, const XMLCh *const qname, const xercesc::Attributes &attributes) override
std::vector< PeptideIdentification > * pep_ids_
Definition: XQuestResultXMLHandler.h:137
double rt_light_
Definition: XQuestResultXMLHandler.h:165
int spectrum_index_heavy_
Definition: XQuestResultXMLHandler.h:133
double getMaxScore() const
Returns the maximum score encountered in the file.
Interface for classes that can store arbitrary meta information (Type-Name-Value tuples).
Definition: MetaInfoInterface.h:61
Representation of a peptide hit.
Definition: PeptideHit.h:57
Database for enzymes that digest proteins (proteases)
Definition: ProteaseDB.h:53
A more convenient string class.
Definition: String.h:60
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48