OpenMS
IDRipper.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg$
32 // $Authors: Immanuel Luhn, Leon Kuchenbecker$
33 // --------------------------------------------------------------------------
34 #pragma once
35 
39 #include <unordered_map>
40 
41 
42 namespace OpenMS
43 {
53  class OPENMS_DLLAPI IDRipper :
54  public DefaultParamHandler
55  {
56 public:
58  enum OriginAnnotationFormat { FILE_ORIGIN = 0, MAP_INDEX = 1, ID_MERGE_INDEX = 2, UNKNOWN_OAF = 3, SIZE_OF_ORIGIN_ANNOTATION_FORMAT = 4 };
59 
61  static const std::array<std::string, SIZE_OF_ORIGIN_ANNOTATION_FORMAT> names_of_OriginAnnotationFormat;
62 
64  struct OPENMS_DLLAPI IdentificationRuns
65  {
67  std::map<String, UInt> index_map;
69  std::vector<StringList> spectra_data;
70 
72  IdentificationRuns(const std::vector<ProteinIdentification>& prot_ids);
73  };
74 
76  struct OPENMS_DLLAPI RipFileIdentifier
77  {
79  UInt ident_run_idx{};
81  UInt file_origin_idx{};
86 
89  const PeptideIdentification& pep_id,
90  const std::map<String, UInt>& file_origin_map,
91  const IDRipper::OriginAnnotationFormat origin_annotation_fmt,
92  bool split_ident_runs);
93 
96 
99 
101  const String & getOriginFullname() const;
102 
104  const String & getOutputBasename() const;
105  };
106 
109  {
110  bool operator()(const RipFileIdentifier& left, const RipFileIdentifier& right) const;
111  };
112 
114  struct OPENMS_DLLAPI RipFileContent
115  {
117  std::vector<ProteinIdentification> prot_idents;
119  std::vector<PeptideIdentification> pep_idents;
121  RipFileContent(const std::vector<ProteinIdentification>& prot_idents, const std::vector<PeptideIdentification>& pep_idents)
122  : prot_idents(prot_idents), pep_idents(pep_idents) {}
124  const std::vector<ProteinIdentification> & getProteinIdentifications();
126  const std::vector<PeptideIdentification> & getPeptideIdentifications();
127  };
128 
130  typedef std::map<RipFileIdentifier, RipFileContent, RipFileIdentifierIdxComparator> RipFileMap;
131 
134 
136  ~IDRipper() override;
137 
150  void rip(
151  RipFileMap& ripped,
152  std::vector<ProteinIdentification>& proteins,
153  std::vector<PeptideIdentification>& peptides,
154  bool numeric_filenames,
155  bool split_ident_runs);
156 
169  // Autowrap compatible wrapper for rip(RipFileMap,...)
170  void rip(
171  std::vector<RipFileIdentifier> & rfis,
172  std::vector<RipFileContent> & rfcs,
173  std::vector<ProteinIdentification>& proteins,
174  std::vector<PeptideIdentification>& peptides,
175  bool numeric_filenames,
176  bool split_ident_runs);
177 
178 private:
179  // Not implemented
181  IDRipper(const IDRipper & rhs);
182 
183  // Not implemented
185  IDRipper & operator=(const IDRipper & rhs);
186 
188  OriginAnnotationFormat detectOriginAnnotationFormat_(std::map<String, UInt> & file_origin_map, const std::vector<PeptideIdentification> & peptide_idents);
190  void getProteinHits_(std::vector<ProteinHit> & result, const std::unordered_map<String, const ProteinHit*> & acc2protein_hits, const std::set<String> & protein_accessions);
192  std::set<String> getProteinAccessions_(const std::vector<PeptideHit> & peptide_hits);
196  bool registerBasename_(std::map<String, std::pair<UInt, UInt> >& basename_to_numeric, const IDRipper::RipFileIdentifier& rfi);
198  bool setOriginAnnotationMode_(short& mode, short const new_value);
199  };
200 
201 } // namespace OpenMS
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
Ripping protein/peptide identification according their file origin.
Definition: IDRipper.h:55
bool registerBasename_(std::map< String, std::pair< UInt, UInt > > &basename_to_numeric, const IDRipper::RipFileIdentifier &rfi)
helper function, register a potential output file basename to detect duplicate output basenames
void rip(RipFileMap &ripped, std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, bool numeric_filenames, bool split_ident_runs)
Ripping protein/peptide identification according their file origin.
IDRipper()
Default constructor.
~IDRipper() override
Destructor.
static const std::array< std::string, SIZE_OF_ORIGIN_ANNOTATION_FORMAT > names_of_OriginAnnotationFormat
String representations for the OriginAnnotationFormat enum.
Definition: IDRipper.h:61
void getProteinHits_(std::vector< ProteinHit > &result, const std::unordered_map< String, const ProteinHit * > &acc2protein_hits, const std::set< String > &protein_accessions)
helper function, extracts all protein hits that match the protein accession
OriginAnnotationFormat detectOriginAnnotationFormat_(std::map< String, UInt > &file_origin_map, const std::vector< PeptideIdentification > &peptide_idents)
helper function, detects file origin annotation standard from collections of protein and peptide hits
std::map< RipFileIdentifier, RipFileContent, RipFileIdentifierIdxComparator > RipFileMap
Represents the result of an IDRipper process, a map assigning file content to output file identifiers...
Definition: IDRipper.h:130
int getProteinIdentification_(const PeptideIdentification &pep_ident, const IdentificationRuns &id_runs)
helper function, returns the index of the protein identification for the given peptide identification...
OriginAnnotationFormat
Possible input file encodings for the origin as used by different versions of IDMerger.
Definition: IDRipper.h:58
IDRipper(const IDRipper &rhs)
Copy constructor.
bool setOriginAnnotationMode_(short &mode, short const new_value)
helper function, sets the value of mode to new_value and returns true if the old value was identical ...
void rip(std::vector< RipFileIdentifier > &rfis, std::vector< RipFileContent > &rfcs, std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, bool numeric_filenames, bool split_ident_runs)
Ripping protein/peptide identification according their file origin.
IDRipper & operator=(const IDRipper &rhs)
Assignment.
std::set< String > getProteinAccessions_(const std::vector< PeptideHit > &peptide_hits)
helper function, returns the string representation of the peptide hit accession
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
A more convenient string class.
Definition: String.h:60
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
const std::string ID_MERGE_INDEX
Definition: Constants.h:323
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
Represents a set of IdentificationRuns.
Definition: IDRipper.h:65
IdentificationRuns(const std::vector< ProteinIdentification > &prot_ids)
Generates a new IdentificationRuns object from a vector of ProteinIdentification objects.
std::vector< StringList > spectra_data
Maps the list of spectra data elements to every IdentificationRun index.
Definition: IDRipper.h:69
std::map< String, UInt > index_map
Maps a unique index to every IdentificationRun string representation (getIdentifier()).
Definition: IDRipper.h:67
Represents the content of an IDRipper output file.
Definition: IDRipper.h:115
const std::vector< ProteinIdentification > & getProteinIdentifications()
Get protein identifications.
std::vector< PeptideIdentification > pep_idents
Peptide identifications.
Definition: IDRipper.h:119
RipFileContent(const std::vector< ProteinIdentification > &prot_idents, const std::vector< PeptideIdentification > &pep_idents)
Constructs a new RipFileContent object.
Definition: IDRipper.h:121
const std::vector< PeptideIdentification > & getPeptideIdentifications()
Get peptide identifications.
std::vector< ProteinIdentification > prot_idents
Protein identifications.
Definition: IDRipper.h:117
Provides a 'less' operation for RipFileIdentifiers that ignores the out_basename and origin_fullname ...
Definition: IDRipper.h:109
bool operator()(const RipFileIdentifier &left, const RipFileIdentifier &right) const
Identifies an IDRipper output file.
Definition: IDRipper.h:77
RipFileIdentifier(const IDRipper::IdentificationRuns &id_runs, const PeptideIdentification &pep_id, const std::map< String, UInt > &file_origin_map, const IDRipper::OriginAnnotationFormat origin_annotation_fmt, bool split_ident_runs)
Constructs a new RipFileIdentifier object.
String out_basename
The output basename derived from the file_origin / spectra_data element.
Definition: IDRipper.h:83
const String & getOutputBasename() const
Get output base name.
const String & getOriginFullname() const
Get origin full name.
UInt getFileOriginIdx() const
Get file origin index.
String origin_fullname
The full length origin read from the file_origin / spectra_data element.
Definition: IDRipper.h:85
UInt getIdentRunIdx() const
Get identification run index.