OpenMS
ProteinResolver.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: David Wojnar $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
43 
44 namespace OpenMS
45 {
54  class OPENMS_DLLAPI ProteinResolver :
55  public DefaultParamHandler
56  {
57 
58 public:
59 
60  //default constructor
62 
63  //copy constructor
65 
66  //assignment operator
68 
69  //destructor
70  ~ProteinResolver() override;
71 
72 
73  struct ProteinEntry;
74  struct PeptideEntry;
75  struct ISDGroup;
76  struct MSDGroup;
77  struct ResolverResult;
78 
80  struct ProteinEntry
81  {
82  std::list<PeptideEntry *> peptides;
83  bool traversed;
85  enum type {primary, secondary, primary_indistinguishable, secondary_indistinguishable} protein_type;
86  double weight; //monoisotopic
87  float coverage; //in percent
88  //if Protein is indistinguishable all his fellows are in the list indis
89  std::list<ProteinEntry *> indis;
91  Size msd_group; //index
92  Size isd_group; //index
94  };
95 
97  struct PeptideEntry
98  {
99  std::list<ProteinEntry *> proteins;
100  bool traversed;
105  Size msd_group; //index
106  Size isd_group; //index
108  float intensity;
110  };
111 
113  struct MSDGroup
114  {
115  std::list<ProteinEntry *> proteins;
116  std::list<PeptideEntry *> peptides;
122  float intensity;
123  };
124 
125  struct ISDGroup
126  {
127  std::list<ProteinEntry *> proteins;
128  std::list<PeptideEntry *> peptides;
130  std::list<Size> msd_groups;
131  };
132 
134  {
136  std::vector<ISDGroup> * isds;
137  std::vector<MSDGroup> * msds;
138  std::vector<ProteinEntry> * protein_entries;
139  std::vector<PeptideEntry> * peptide_entries;
140  std::vector<Size> * reindexed_peptides;
141  std::vector<Size> * reindexed_proteins;
142  enum type {PeptideIdent, Consensus} input_type;
143  std::vector<PeptideIdentification> * peptide_identification;
145  };
146 
154  void resolveConsensus(ConsensusMap & consensus);
155 
163  void resolveID(std::vector<PeptideIdentification> & peptide_identifications);
164 
165  // /**
166  // @brief NOT IMPLEMENTED YET
167 
168  // @param protein_nodes
169  // @param peptide_nodes
170  // @param reindexed_proteins
171  // @param reindexed_peptides
172  // @param peptide_identifications
173  // @param output
174  // */
175  // void writeProteinsAndPeptidesmzTab(std::vector<ProteinEntry>& protein_nodes, std::vector<PeptideEntry>& peptide_nodes, std::vector<Size>& reindexed_proteins, std::vector<Size>& reindexed_peptides, std::vector<PeptideIdentification>& peptide_identifications, String& output );
176  // /**
177  // @brief Writing peptide table into text file
178 
179  // @param peptides
180  // @param reindexed_peptides
181  // @param identifications
182  // @param output_file
183  // */
184  // void writePeptideTable(std::vector<PeptideEntry> & peptides, std::vector<Size> & reindexed_peptides, std::vector<PeptideIdentification> & identifications, String & output_file); // not implemented
185  // /**
186  // @brief Writing peptide table into text file
187 
188  // @param peptides
189  // @param reindexed_peptides
190  // @param consensus
191  // @param output
192  // */
193  // void writePeptideTable(std::vector<PeptideEntry> & peptides, std::vector<Size> & reindexed_peptides, ConsensusMap & consensus, String & output_file); // not implemented
194  // /**
195  // @brief Writing protein table into text file
196 
197  // @param proteins
198  // @param reindexed_proteins
199  // @param output_file
200  // */
201  // void writeProteinTable(std::vector<ProteinEntry> & proteins, std::vector<Size> & reindexed_proteins, String & output_file); // not implemented
202  // /**
203  // @brief Writing protein groups into text file
204 
205  // @param isd_groups ISD groups
206  // @param msd_groups MSD groups
207  // @param output_file Path of output file
208  // */
209  // void writeProteinGroups(std::vector<ISDGroup> & isd_groups, std::vector<MSDGroup> & msd_groups, String & output_file); // not implemented
210 
217  void countTargetDecoy(std::vector<MSDGroup> & msd_groups, ConsensusMap & consensus);
218 
225  void countTargetDecoy(std::vector<MSDGroup> & msd_groups, std::vector<PeptideIdentification> & peptide_nodes);
226 
227  void clearResult();
228 
229  void setProteinData(std::vector<FASTAFile::FASTAEntry> & protein_data);
230 
231  const std::vector<ResolverResult> & getResults();
232 
234  static const PeptideIdentification & getPeptideIdentification(const ConsensusMap & consensus, const PeptideEntry * peptide);
235  static const PeptideHit & getPeptideHit(const ConsensusMap & consensus, const PeptideEntry * peptide);
236  static const PeptideIdentification & getPeptideIdentification(const std::vector<PeptideIdentification> & peptide_nodes, const PeptideEntry * peptide);
237  static const PeptideHit & getPeptideHit(const std::vector<PeptideIdentification> & peptide_nodes, const PeptideEntry * peptide);
238 
239 private:
240 
241  std::vector<ResolverResult> resolver_result_;
242  std::vector<FASTAFile::FASTAEntry> protein_data_;
243 
244  void computeIntensityOfMSD_(std::vector<MSDGroup> & msd_groups);
245 
247  void traverseProtein_(ProteinEntry * prot_node, MSDGroup & group);
248  void traversePeptide_(PeptideEntry * pep_node, MSDGroup & group);
250  Size findPeptideEntry_(String seq, std::vector<PeptideEntry> & nodes);
252  Size binarySearchNodes_(String & seq, std::vector<PeptideEntry> & nodes, Size start, Size end);
254  Size includeMSMSPeptides_(std::vector<PeptideIdentification> & peptide_identifications, std::vector<PeptideEntry> & peptide_nodes);
257  Size includeMSMSPeptides_(ConsensusMap & consensus, std::vector<PeptideEntry> & peptide_nodes);
259  void reindexingNodes_(std::vector<MSDGroup> & msd_groups, std::vector<Size> & reindexed_proteins, std::vector<Size> & reindexed_peptides);
261  void primaryProteins_(std::vector<PeptideEntry> & peptide_nodes, std::vector<Size> & reindexed_peptides);
262  void buildingMSDGroups_(std::vector<MSDGroup> & msd_groups, std::vector<ISDGroup> & isd_groups);
263  void buildingISDGroups_(std::vector<ProteinEntry> & protein_nodes, std::vector<PeptideEntry> & peptide_nodes,
264  std::vector<ISDGroup> & isd_groups);
265  // disabled/buggy
266  //ProteinResolver::indistinguishableProteins(vector<MSDGroup>& msd_groups);
267 
268  }; // class
269 
270 } // namespace
271 
A container for consensus elements.
Definition: ConsensusMap.h:92
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
Representation of a peptide hit.
Definition: PeptideHit.h:57
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
Helper class for peptide and protein quantification based on feature data annotated with IDs.
Definition: ProteinResolver.h:56
static const PeptideHit & getPeptideHit(const std::vector< PeptideIdentification > &peptide_nodes, const PeptideEntry *peptide)
Size peptide_hit
Definition: ProteinResolver.h:103
float intensity
Definition: ProteinResolver.h:108
void resolveID(std::vector< PeptideIdentification > &peptide_identifications)
Computing protein groups from peptide identifications OR consensus map.
String sequence
Definition: ProteinResolver.h:101
std::list< Size > msd_groups
Definition: ProteinResolver.h:130
std::vector< ResolverResult > resolver_result_
Definition: ProteinResolver.h:241
void buildingISDGroups_(std::vector< ProteinEntry > &protein_nodes, std::vector< PeptideEntry > &peptide_nodes, std::vector< ISDGroup > &isd_groups)
void countTargetDecoy(std::vector< MSDGroup > &msd_groups, std::vector< PeptideIdentification > &peptide_nodes)
brief
bool traversed
Definition: ProteinResolver.h:100
std::list< PeptideEntry * > peptides
Definition: ProteinResolver.h:116
void resolveConsensus(ConsensusMap &consensus)
Computing protein groups from peptide identifications OR consensus map.
Size number_of_decoy
Definition: ProteinResolver.h:119
Size includeMSMSPeptides_(std::vector< PeptideIdentification > &peptide_identifications, std::vector< PeptideEntry > &peptide_nodes)
includes all MS/MS derived peptides into the graph –idXML
String origin
Definition: ProteinResolver.h:109
void traversePeptide_(PeptideEntry *pep_node, MSDGroup &group)
bool experimental
Definition: ProteinResolver.h:107
void primaryProteins_(std::vector< PeptideEntry > &peptide_nodes, std::vector< Size > &reindexed_peptides)
marks Proteins which have a unique peptide as primary. Uses reindexed vector, thus reindexingNodes ha...
Size number_of_target_plus_decoy
Definition: ProteinResolver.h:121
static const PeptideIdentification & getPeptideIdentification(const std::vector< PeptideIdentification > &peptide_nodes, const PeptideEntry *peptide)
const std::vector< ResolverResult > & getResults()
ISDGroup * isd_group
Definition: ProteinResolver.h:118
Size number_of_target
Definition: ProteinResolver.h:120
Size includeMSMSPeptides_(ConsensusMap &consensus, std::vector< PeptideEntry > &peptide_nodes)
void traverseProtein_(ProteinEntry *prot_node, MSDGroup &group)
traverse protein and peptide nodes for building MSD groups
void buildingMSDGroups_(std::vector< MSDGroup > &msd_groups, std::vector< ISDGroup > &isd_groups)
ProteinResolver(const ProteinResolver &rhs)
static const PeptideIdentification & getPeptideIdentification(const ConsensusMap &consensus, const PeptideEntry *peptide)
overloaded functions – return a const reference to a PeptideIdentification object or a peptideHit eit...
Size msd_group
Definition: ProteinResolver.h:105
Size findPeptideEntry_(String seq, std::vector< PeptideEntry > &nodes)
searches given sequence in all nodes and returns its index or nodes.size() if not found.
void countTargetDecoy(std::vector< MSDGroup > &msd_groups, ConsensusMap &consensus)
brief
std::list< ProteinEntry * > proteins
Definition: ProteinResolver.h:99
Size binarySearchNodes_(String &seq, std::vector< PeptideEntry > &nodes, Size start, Size end)
helper function for findPeptideEntry.
void setProteinData(std::vector< FASTAFile::FASTAEntry > &protein_data)
std::vector< FASTAFile::FASTAEntry > protein_data_
Definition: ProteinResolver.h:242
static const PeptideHit & getPeptideHit(const ConsensusMap &consensus, const PeptideEntry *peptide)
void reindexingNodes_(std::vector< MSDGroup > &msd_groups, std::vector< Size > &reindexed_proteins, std::vector< Size > &reindexed_peptides)
Proteins and Peptides get reindexed, based on whether they belong to msd groups or not....
Size peptide_identification
Definition: ProteinResolver.h:102
Size index
Definition: ProteinResolver.h:104
ProteinResolver & operator=(const ProteinResolver &rhs)
void computeIntensityOfMSD_(std::vector< MSDGroup > &msd_groups)
Size isd_group
Definition: ProteinResolver.h:106
Definition: ProteinResolver.h:126
representation of an msd group. Contains peptides, proteins and a pointer to its ISD group
Definition: ProteinResolver.h:114
represents a peptide. First in silico. If experimental is set to true it is MS/MS derived.
Definition: ProteinResolver.h:98
A more convenient string class.
Definition: String.h:60
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:72
represents a protein from FASTA file
Definition: ProteinResolver.h:81
float coverage
Definition: ProteinResolver.h:87
std::list< ProteinEntry * > indis
Definition: ProteinResolver.h:89
bool traversed
Definition: ProteinResolver.h:83
std::list< PeptideEntry * > peptides
Definition: ProteinResolver.h:82
type
Definition: ProteinResolver.h:85
@ primary
Definition: ProteinResolver.h:85
double weight
Definition: ProteinResolver.h:86
Size number_of_experimental_peptides
Definition: ProteinResolver.h:93
Size msd_group
Definition: ProteinResolver.h:91
FASTAFile::FASTAEntry * fasta_entry
Definition: ProteinResolver.h:84
Size index
Definition: ProteinResolver.h:90
Size isd_group
Definition: ProteinResolver.h:92
Definition: ProteinResolver.h:134
std::vector< MSDGroup > * msds
Definition: ProteinResolver.h:137
std::vector< ISDGroup > * isds
Definition: ProteinResolver.h:136
std::vector< PeptideIdentification > * peptide_identification
Definition: ProteinResolver.h:143
type
Definition: ProteinResolver.h:142
std::vector< Size > * reindexed_proteins
Definition: ProteinResolver.h:141
std::vector< PeptideEntry > * peptide_entries
Definition: ProteinResolver.h:139
std::vector< Size > * reindexed_peptides
Definition: ProteinResolver.h:140
std::vector< ProteinEntry > * protein_entries
Definition: ProteinResolver.h:138
ConsensusMap * consensus_map
Definition: ProteinResolver.h:144
String identifier
Definition: ProteinResolver.h:135