OpenMS
IDScoreSwitcherAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Julianus Pfeuffer $
32 // $Authors: Julianus Pfeuffer $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
41 
42 #include <vector>
43 #include <set>
44 
45 namespace OpenMS
46 {
47 
48  class OPENMS_DLLAPI IDScoreSwitcherAlgorithm:
49  public DefaultParamHandler
50  {
51  public:
53 
58  enum class ScoreType
59  {
60  RAW,
61  RAW_EVAL,
62  PP,
63  PEP,
64  FDR,
65  QVAL,
66  };
67 
69  bool isScoreType(const String& score_name, const ScoreType& type)
70  {
71  const std::set<String>& possible_types = type_to_str_[type];
72  return possible_types.find(score_name) != possible_types.end();
73  }
74 
80  template <typename IDType>
81  void switchScores(IDType& id, Size& counter)
82  {
83  for (auto hit_it = id.getHits().begin();
84  hit_it != id.getHits().end(); ++hit_it, ++counter)
85  {
86  if (!hit_it->metaValueExists(new_score_))
87  {
88  std::stringstream msg;
89  msg << "Meta value '" << new_score_ << "' not found for " << *hit_it;
90  throw Exception::MissingInformation(__FILE__, __LINE__,
91  OPENMS_PRETTY_FUNCTION, msg.str());
92  }
93 
94  const String& old_score_meta = (old_score_.empty() ? id.getScoreType() :
95  old_score_);
96  const DataValue& dv = hit_it->getMetaValue(old_score_meta);
97  if (!dv.isEmpty()) // meta value for old score already exists
98  {
99  // TODO: find a better way to check if old score type is something different (even if it has same name)
100  // This currently, is a workaround for e.g., having Percolator_qvalue as meta value and same q-value as main score (getScore()).
101  if (fabs((double(dv) - hit_it->getScore()) * 2.0 /
102  (double(dv) + hit_it->getScore())) > tolerance_)
103  {
104  hit_it->setMetaValue(old_score_meta + "~", hit_it->getScore());
105  }
106  }
107  else
108  {
109  hit_it->setMetaValue(old_score_meta, hit_it->getScore());
110  }
111  hit_it->setScore(hit_it->getMetaValue(new_score_));
112  }
113  id.setScoreType(new_score_type_);
114  id.setHigherScoreBetter(higher_better_);
115  }
116 
120  void switchToGeneralScoreType(std::vector<PeptideIdentification>& id, ScoreType type, Size& counter)
121  {
122  if (id.empty()) return;
123  String t = findScoreType(id[0], type);
124  if (t.empty())
125  {
126  String msg = "First encountered ID does not have the requested score type.";
127  throw Exception::MissingInformation(__FILE__, __LINE__,
128  OPENMS_PRETTY_FUNCTION, msg);
129  }
130  else if (t == id[0].getScoreType())
131  {
132  // we assume that all the other peptide ids
133  // also already have the correct score set
134  return;
135  }
136 
137  if (t.hasSuffix("_score"))
138  {
139  new_score_type_ = t.chop(6);
140  }
141  else
142  {
143  new_score_type_ = t;
144  }
145  new_score_ = t;
146 
147  if (type != ScoreType::RAW && higher_better_ != type_to_better_[type])
148  {
149  OPENMS_LOG_WARN << "Requested non-raw score type does not match the expected score direction. Correcting!\n";
150  higher_better_ = type_to_better_[type];
151  }
152  for (auto& i : id)
153  {
154  switchScores(i, counter);
155  }
156  }
157 
161  void switchToGeneralScoreType(ConsensusMap& cmap, ScoreType type, Size& counter, bool unassigned_peptides_too = true)
162  {
163  String new_type = "";
164  for (const auto& f : cmap)
165  {
166  const auto& ids = f.getPeptideIdentifications();
167  if (!ids.empty())
168  {
169  new_type = findScoreType(ids[0], type);
170  if (new_type == ids[0].getScoreType())
171  {
172  return;
173  }
174  else
175  {
176  break;
177  }
178  }
179  }
180 
181  if (new_type.empty())
182  {
183  String msg = "First encountered ID does not have the requested score type.";
184  throw Exception::MissingInformation(__FILE__, __LINE__,
185  OPENMS_PRETTY_FUNCTION, msg);
186  }
187 
188  if (new_type.hasSuffix("_score"))
189  {
190  new_score_type_ = new_type.chop(6);
191  }
192  else
193  {
194  new_score_type_ = new_type;
195  }
196  new_score_ = new_type;
197 
198  if (type != ScoreType::RAW && higher_better_ != type_to_better_[type])
199  {
200  OPENMS_LOG_WARN << "Requested non-raw score type does not match the expected score direction. Correcting!\n";
201  higher_better_ = type_to_better_[type];
202  }
203 
204  const auto switchScoresSingle = [&counter,this](PeptideIdentification& id){switchScores(id,counter);};
205  cmap.applyFunctionOnPeptideIDs(switchScoresSingle, unassigned_peptides_too);
206  }
207 
208 
210  template <typename IDType>
212  {
213  const String& curr_score_type = id.getScoreType();
214  const std::set<String>& possible_types = type_to_str_[type];
215  if (possible_types.find(curr_score_type) != possible_types.end())
216  {
217  OPENMS_LOG_INFO << "Requested score type already set as main score: " + curr_score_type + "\n";
218  return curr_score_type;
219  }
220  else
221  {
222  if (id.getHits().empty())
223  {
224  OPENMS_LOG_WARN << "Identification entry used to check for alternative score was empty.\n";
225  return "";
226  }
227  const auto& hit = id.getHits()[0];
228  for (const auto& poss_str : possible_types)
229  {
230  if (hit.metaValueExists(poss_str)) return poss_str;
231  else if (hit.metaValueExists(poss_str + "_score")) return poss_str + "_score";
232  }
233  OPENMS_LOG_WARN << "Score of requested type not found in the UserParams of the checked ID object.\n";
234  return "";
235  }
236  }
237 
238  private:
239  void updateMembers_() override;
240 
242  const double tolerance_ = 1e-6;
243 
245  String new_score_, new_score_type_, old_score_;
247  bool higher_better_; // for the new scores, are higher ones better?
248 
250  std::map<ScoreType, std::set<String>> type_to_str_ =
251  {
252  {ScoreType::RAW, {"XTandem", "OMSSA", "SEQUEST:xcorr", "Mascot", "mvh"}},
253  //TODO find out reasonable raw scores for SES that provide E-Values as main score or see below
254  //TODO there is no test for spectraST idXML, so I don't know its score
255  //TODO check if we should combine RAW and RAW_EVAL:
256  // What if a SE does not have an e-value score (spectrast, OMSSA, crux/sequest, myrimatch),
257  // then you need additional if's/try's
258  {ScoreType::RAW_EVAL, {"expect", "SpecEValue", "E-Value", "evalue", "MS:1002053", "MS:1002257"}},
259  {ScoreType::PP, {"Posterior Probability"}},
260  {ScoreType::PEP, {"Posterior Error Probability", "pep", "MS:1001493"}}, // TODO add CV terms
261  {ScoreType::FDR, {"FDR", "fdr", "false discovery rate"}},
262  {ScoreType::QVAL, {"q-value", "qvalue", "MS:1001491", "q-Value", "qval"}}
263  };
264 
266  std::map<ScoreType, bool> type_to_better_ =
267  {
268  {ScoreType::RAW, true}, //TODO this might actually not always be true
269  {ScoreType::RAW_EVAL, false},
270  {ScoreType::PP, true},
271  {ScoreType::PEP, false},
272  {ScoreType::FDR, false},
273  {ScoreType::QVAL, false}
274  };
275  };
276 } // namespace OpenMS
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:470
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:475
A container for consensus elements.
Definition: ConsensusMap.h:92
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:59
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:388
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
Not all required information provided.
Definition: Exception.h:188
Definition: IDScoreSwitcherAlgorithm.h:50
String findScoreType(IDType &id, IDScoreSwitcherAlgorithm::ScoreType type)
finds a certain score type in an ID and its metavalues if present, otherwise returns empty string
Definition: IDScoreSwitcherAlgorithm.h:211
void switchToGeneralScoreType(ConsensusMap &cmap, ScoreType type, Size &counter, bool unassigned_peptides_too=true)
Definition: IDScoreSwitcherAlgorithm.h:161
void switchScores(IDType &id, Size &counter)
Definition: IDScoreSwitcherAlgorithm.h:81
ScoreType
Definition: IDScoreSwitcherAlgorithm.h:59
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
String new_score_
will be set according to the algorithm parameters
Definition: IDScoreSwitcherAlgorithm.h:245
void switchToGeneralScoreType(std::vector< PeptideIdentification > &id, ScoreType type, Size &counter)
Definition: IDScoreSwitcherAlgorithm.h:120
bool higher_better_
will be set according to the algorithm parameters
Definition: IDScoreSwitcherAlgorithm.h:247
bool isScoreType(const String &score_name, const ScoreType &type)
Checks if the given score_name is of ScoreType type.
Definition: IDScoreSwitcherAlgorithm.h:69
void applyFunctionOnPeptideIDs(T &&f, bool include_unassigned=true)
applies a function on all PeptideIDs or only assigned ones
Definition: MapUtilities.h:68
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
A more convenient string class.
Definition: String.h:60
String chop(Size n) const
Returns a substring where n characters were removed from the end of the string.
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48