OpenMS
SimpleSearchEngineAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Timo Sachsenberg $
33 // --------------------------------------------------------------------------
34 
37 
41 
42 #include <vector>
43 
44 namespace OpenMS
45 {
46 
47 class OPENMS_DLLAPI SimpleSearchEngineAlgorithm :
48  public DefaultParamHandler,
49  public ProgressLogger
50 {
51  public:
53 
55  enum class ExitCodes
56  {
57  EXECUTION_OK,
58  INPUT_FILE_EMPTY,
59  UNEXPECTED_RESULT,
60  UNKNOWN_ERROR,
61  ILLEGAL_PARAMETERS
62  };
63 
65  ExitCodes search(const String& in_mzML,
66  const String& in_db,
67  std::vector<ProteinIdentification>& prot_ids,
68  std::vector<PeptideIdentification>& pep_ids) const;
69  protected:
70  void updateMembers_() override;
71 
74  {
77  double score = 0;
78  std::vector<PeptideHit::PeakAnnotation> fragment_annotations;
79  double prefix_fraction = 0;
80  double suffix_fraction = 0;
81  double mean_error = 0.0;
82 
83  static bool hasBetterScore(const AnnotatedHit_& a, const AnnotatedHit_& b)
84  {
85  if (a.score != b.score) return a.score > b.score;
86  // compare the mod_index first, as it is cheaper than the strncmp() of the sequences
87  // there doesn't have to be a certain ordering (that makes sense), we just need it to be thread-safe
89  return a.sequence < b.sequence;
90  }
91  };
92 
94  static void preprocessSpectra_(PeakMap& exp, double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm);
95 
98  void postProcessHits_(const PeakMap& exp,
99  std::vector<std::vector<SimpleSearchEngineAlgorithm::AnnotatedHit_> >& annotated_hits,
100  std::vector<ProteinIdentification>& protein_ids,
101  std::vector<PeptideIdentification>& peptide_ids,
102  Size top_hits,
103  const ModifiedPeptideGenerator::MapToResidueType& fixed_modifications,
104  const ModifiedPeptideGenerator::MapToResidueType& variable_modifications,
105  Size max_variable_mods_per_peptide,
106  const StringList& modifications_fixed,
107  const StringList& modifications_variable,
108  Int peptide_missed_cleavages,
109  double precursor_mass_tolerance,
110  double fragment_mass_tolerance,
111  const String& precursor_mass_tolerance_unit_ppm,
112  const String& fragment_mass_tolerance_unit_ppm,
113  const Int precursor_min_charge,
114  const Int precursor_max_charge,
115  const String& enzyme,
116  const String& database_name) const;
117 
120 
123 
125 
127 
129 
131 
133 
135 
137 
138  bool decoys_;
139 
141 
145 
147 
149 };
150 
151 } // namespace
152 
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:72
Definition: ModifiedPeptideGenerator.h:57
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:53
Definition: SimpleSearchEngineAlgorithm.h:50
static void preprocessSpectra_(PeakMap &exp, double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm)
filter, deisotope, decharge spectra
ExitCodes search(const String &in_mzML, const String &in_db, std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids) const
search spectra against database
Size peptide_max_size_
Definition: SimpleSearchEngineAlgorithm.h:143
Size precursor_max_charge_
Definition: SimpleSearchEngineAlgorithm.h:122
Size precursor_min_charge_
Definition: SimpleSearchEngineAlgorithm.h:121
void postProcessHits_(const PeakMap &exp, std::vector< std::vector< SimpleSearchEngineAlgorithm::AnnotatedHit_ > > &annotated_hits, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids, Size top_hits, const ModifiedPeptideGenerator::MapToResidueType &fixed_modifications, const ModifiedPeptideGenerator::MapToResidueType &variable_modifications, Size max_variable_mods_per_peptide, const StringList &modifications_fixed, const StringList &modifications_variable, Int peptide_missed_cleavages, double precursor_mass_tolerance, double fragment_mass_tolerance, const String &precursor_mass_tolerance_unit_ppm, const String &fragment_mass_tolerance_unit_ppm, const Int precursor_min_charge, const Int precursor_max_charge, const String &enzyme, const String &database_name) const
filter and annotate search results most of the parameters are used to properly add meta data to the i...
Size report_top_hits_
Definition: SimpleSearchEngineAlgorithm.h:148
Size modifications_max_variable_mods_per_peptide_
Definition: SimpleSearchEngineAlgorithm.h:134
String precursor_mass_tolerance_unit_
Definition: SimpleSearchEngineAlgorithm.h:119
StringList modifications_fixed_
Definition: SimpleSearchEngineAlgorithm.h:130
String enzyme_
Definition: SimpleSearchEngineAlgorithm.h:136
Size peptide_min_size_
Definition: SimpleSearchEngineAlgorithm.h:142
String fragment_mass_tolerance_unit_
Definition: SimpleSearchEngineAlgorithm.h:128
IntList precursor_isotopes_
Definition: SimpleSearchEngineAlgorithm.h:124
bool decoys_
Definition: SimpleSearchEngineAlgorithm.h:138
StringList annotate_psm_
Definition: SimpleSearchEngineAlgorithm.h:140
double precursor_mass_tolerance_
Definition: SimpleSearchEngineAlgorithm.h:118
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
ExitCodes
Exit codes.
Definition: SimpleSearchEngineAlgorithm.h:56
StringList modifications_variable_
Definition: SimpleSearchEngineAlgorithm.h:132
double fragment_mass_tolerance_
Definition: SimpleSearchEngineAlgorithm.h:126
String peptide_motif_
Definition: SimpleSearchEngineAlgorithm.h:146
Size peptide_missed_cleavages_
Definition: SimpleSearchEngineAlgorithm.h:144
StringView provides a non-owning view on an existing string.
Definition: StringView.h:56
A more convenient string class.
Definition: String.h:60
int Int
Signed integer type.
Definition: Types.h:102
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:55
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
Slimmer structure as storing all scored candidates in PeptideHit objects takes too much space.
Definition: SimpleSearchEngineAlgorithm.h:74
static bool hasBetterScore(const AnnotatedHit_ &a, const AnnotatedHit_ &b)
Definition: SimpleSearchEngineAlgorithm.h:83
double score
main score
Definition: SimpleSearchEngineAlgorithm.h:77
StringView sequence
Definition: SimpleSearchEngineAlgorithm.h:75
std::vector< PeptideHit::PeakAnnotation > fragment_annotations
Definition: SimpleSearchEngineAlgorithm.h:78
SignedSize peptide_mod_index
enumeration index of the non-RNA peptide modification
Definition: SimpleSearchEngineAlgorithm.h:76