OpenMS
ConfidenceScoring.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Hannes Roest, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <cmath> // for "exp"
38 #include <limits> // for "infinity"
39 #include <map>
40 
46 
48 
49 namespace OpenMS
50 {
51 
52  class OPENMS_DLLAPI ConfidenceScoring :
53  public ProgressLogger
54  {
55  public:
56 
58  explicit ConfidenceScoring(bool test_mode_ = false);
59 
60  ~ConfidenceScoring() override {}
61 
62  protected:
63 
65  struct GLM_
66  {
67  double intercept;
68  double rt_coef;
69  double int_coef;
70 
71  double operator()(double diff_rt, double dist_int) const
72  {
73  double lm = intercept + rt_coef * diff_rt * diff_rt +
74  int_coef * dist_int;
75  return 1.0 / (1.0 + exp(-lm));
76  }
77  } glm_;
78 
80  struct RTNorm_
81  {
82  double min_rt;
83  double max_rt;
84 
85  double operator()(double rt) const
86  {
87  return (rt - min_rt) / (max_rt - min_rt) * 100;
88  }
89  } rt_norm_;
90 
92 
94 
96 
97  std::map<String, IntList> transition_map_;
98 
100 
103 
105 
108 
111 
114 
119  double feature_rt, DoubleList& feature_intensities,
120  const std::set<String>& transition_ids = std::set<String>());
121 
123  void scoreFeature_(Feature& feature);
124 
125  public:
126 
127  void initialize(const TargetedExperiment& library, const Size n_decoys, const Size n_transitions, const TransformationDescription& rt_trafo)
128  {
129  library_ = library;
130  n_decoys_ = n_decoys;
131  n_transitions_ = n_transitions;
132  rt_trafo_ = rt_trafo;
133  }
134 
135  void initializeGlm(double intercept, double rt_coef, double int_coef)
136  {
137  glm_.intercept = intercept;
138  glm_.rt_coef = rt_coef;
139  glm_.int_coef = int_coef;
140  }
141 
154  void scoreMap(FeatureMap & features)
155  {
156  // are there enough assays in the library?
157  Size n_assays = library_.getPeptides().size();
158  if (n_assays < 2)
159  {
160  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
161  "There need to be at least 2 assays in the library for ConfidenceScoring.");
162 
163  }
164  if (n_assays - 1 < n_decoys_)
165  {
166  OPENMS_LOG_WARN << "Warning: Parameter 'decoys' (" << n_decoys_
167  << ") is higher than the number of unrelated assays in the "
168  << "library (" << n_assays - 1 << "). "
169  << "Using all unrelated assays as decoys." << std::endl;
170  }
171  if (n_assays - 1 <= n_decoys_) n_decoys_ = 0; // use all available assays
172 
173  decoy_index_.resize(n_assays);
174  for (Size i = 0; i < n_assays; ++i) decoy_index_[i] = boost::numeric_cast<Int>(i);
175 
176  // build mapping between assays and transitions:
177  OPENMS_LOG_DEBUG << "Building transition map..." << std::endl;
178  for (Size i = 0; i < library_.getTransitions().size(); ++i)
179  {
180  const String& ref = library_.getTransitions()[i].getPeptideRef();
181  transition_map_[ref].push_back(boost::numeric_cast<Int>(i));
182  }
183  // find min./max. RT in the library:
184  OPENMS_LOG_DEBUG << "Determining retention time range..." << std::endl;
185  rt_norm_.min_rt = std::numeric_limits<double>::infinity();
186  rt_norm_.max_rt = -std::numeric_limits<double>::infinity();
187  for (std::vector<TargetedExperiment::Peptide>::const_iterator it =
188  library_.getPeptides().begin(); it != library_.getPeptides().end();
189  ++it)
190  {
191  double current_rt = getAssayRT_(*it);
192  if (current_rt == -1.0) continue; // indicates a missing value
193  rt_norm_.min_rt = std::min(rt_norm_.min_rt, current_rt);
194  rt_norm_.max_rt = std::max(rt_norm_.max_rt, current_rt);
195  }
196 
197  // log scoring progress:
198  OPENMS_LOG_DEBUG << "Scoring features..." << std::endl;
199  startProgress(0, features.size(), "scoring features");
200 
201  for (FeatureMap::Iterator feat_it = features.begin();
202  feat_it != features.end(); ++feat_it)
203  {
204  OPENMS_LOG_DEBUG << "Feature " << feat_it - features.begin() + 1
205  << " (ID '" << feat_it->getUniqueId() << "')"<< std::endl;
206  scoreFeature_(*feat_it);
207  setProgress(feat_it - features.begin());
208  }
209  endProgress();
210 
211  }
212 
213  };
214 
215 }
216 
#define OPENMS_LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:480
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:470
Definition: ConfidenceScoring.h:54
double scoreAssay_(const TargetedExperiment::Peptide &assay, double feature_rt, DoubleList &feature_intensities, const std::set< String > &transition_ids=std::set< String >())
void scoreMap(FeatureMap &features)
Score a feature map -> make sure the class is properly initialized.
Definition: ConfidenceScoring.h:154
void chooseDecoys_()
Randomize the list of decoy indexes.
std::map< String, IntList > transition_map_
assay (ID) -> transitions (indexes)
Definition: ConfidenceScoring.h:97
TargetedExperiment library_
assay library
Definition: ConfidenceScoring.h:91
Math::RandomShuffler shuffler_
random shuffler for container
Definition: ConfidenceScoring.h:104
IntList decoy_index_
indexes of assays to use as decoys
Definition: ConfidenceScoring.h:93
Size n_decoys_
number of decoys to use (per feature/true assay)
Definition: ConfidenceScoring.h:95
double getAssayRT_(const TargetedExperiment::Peptide &assay)
Get the retention time of an assay.
~ConfidenceScoring() override
Definition: ConfidenceScoring.h:60
TransformationDescription rt_trafo_
RT transformation to map measured RTs to assay RTs.
Definition: ConfidenceScoring.h:102
double manhattanDist_(DoubleList x, DoubleList y)
Manhattan distance.
ConfidenceScoring(bool test_mode_=false)
Constructor.
void scoreFeature_(Feature &feature)
Score a feature.
void initializeGlm(double intercept, double rt_coef, double int_coef)
Definition: ConfidenceScoring.h:135
Size n_transitions_
number of transitions to consider
Definition: ConfidenceScoring.h:99
void initialize(const TargetedExperiment &library, const Size n_decoys, const Size n_transitions, const TransformationDescription &rt_trafo)
Definition: ConfidenceScoring.h:127
A method or algorithm argument contains illegal values.
Definition: Exception.h:650
size_t size() const noexcept
Definition: ExposedVector.h:147
iterator begin() noexcept
Definition: ExposedVector.h:123
iterator end() noexcept
Definition: ExposedVector.h:127
A container for features.
Definition: FeatureMap.h:106
iterator Iterator
Definition: FeatureMap.h:113
An LC-MS feature.
Definition: Feature.h:72
Definition: MathFunctions.h:408
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:53
A more convenient string class.
Definition: String.h:60
Represents a peptide (amino acid sequence)
Definition: TargetedExperimentHelper.h:360
A description of a targeted experiment containing precursor and production ions.
Definition: TargetedExperiment.h:65
const std::vector< Peptide > & getPeptides() const
const std::vector< ReactionMonitoringTransition > & getTransitions() const
returns the transition list
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:63
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:55
std::vector< double > DoubleList
Vector of double precision real types.
Definition: ListUtils.h:62
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
Binomial GLM.
Definition: ConfidenceScoring.h:66
double rt_coef
Definition: ConfidenceScoring.h:68
double int_coef
Definition: ConfidenceScoring.h:69
double operator()(double diff_rt, double dist_int) const
Definition: ConfidenceScoring.h:71
double intercept
Definition: ConfidenceScoring.h:67
Helper for RT normalization (range 0-100)
Definition: ConfidenceScoring.h:81
double min_rt
Definition: ConfidenceScoring.h:82
double max_rt
Definition: ConfidenceScoring.h:83
double operator()(double rt) const
Definition: ConfidenceScoring.h:85