OpenMS
IDFilter.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Mathias Walzer $
32 // $Authors: Nico Pfeifer, Mathias Walzer, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
47 #include <OpenMS/config.h>
48 #include <algorithm>
49 #include <climits>
50 #include <functional>
51 #include <map>
52 #include <set>
53 #include <unordered_set>
54 #include <vector>
55 
56 namespace OpenMS
57 {
78  class OPENMS_DLLAPI IDFilter
79  {
80  public:
82  IDFilter() = default;
83 
85  virtual ~IDFilter() = default;
86 
88  typedef std::map<Int, PeptideHit*> ChargeToPepHitP;
89  typedef std::unordered_map<std::string, ChargeToPepHitP> SequenceToChargeToPepHitP;
90  typedef std::map<std::string, SequenceToChargeToPepHitP> RunToSequenceToChargeToPepHitP;
91 
98 
100  template<class HitType>
101  struct HasGoodScore {
102  typedef HitType argument_type; // for use as a predicate
103 
104  double score;
106 
107  HasGoodScore(double score_, bool higher_score_better_) : score(score_), higher_score_better(higher_score_better_)
108  {
109  }
110 
111  bool operator()(const HitType& hit) const
112  {
113  if (higher_score_better)
114  {
115  return hit.getScore() >= score;
116  }
117  return hit.getScore() <= score;
118  }
119  };
120 
126  template<class HitType>
127  struct HasMaxRank {
128  typedef HitType argument_type; // for use as a predicate
129 
131 
132  HasMaxRank(Size rank_) : rank(rank_)
133  {
134  if (rank_ == 0)
135  {
136  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The cut-off value for rank filtering must not be zero!");
137  }
138  }
139 
140  bool operator()(const HitType& hit) const
141  {
142  Size hit_rank = hit.getRank();
143  if (hit_rank == 0)
144  {
145  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "No rank assigned to peptide or protein hit");
146  }
147  return hit_rank <= rank;
148  }
149  };
150 
156  template<class HitType>
157  struct HasMetaValue {
158  typedef HitType argument_type; // for use as a predicate
159 
162 
163  HasMetaValue(const String& key_, const DataValue& value_) : key(key_), value(value_)
164  {
165  }
166 
167  bool operator()(const HitType& hit) const
168  {
169  DataValue found = hit.getMetaValue(key);
170  if (found.isEmpty())
171  return false; // meta value "key" not set
172  if (value.isEmpty())
173  return true; // "key" is set, value doesn't matter
174  return found == value;
175  }
176  };
177 
179  template<class HitType>
181  typedef HitType argument_type; // for use as a predicate
182 
184  double value;
185 
186  HasMaxMetaValue(const String& key_, const double& value_) : key(key_), value(value_)
187  {
188  }
189 
190  bool operator()(const HitType& hit) const
191  {
192  DataValue found = hit.getMetaValue(key);
193  if (found.isEmpty())
194  return false; // meta value "key" not set
195  return double(found) <= value;
196  }
197  };
198 
200  template<class HitType>
202  typedef HitType argument_type; // for use as a predicate
203 
204  struct HasMetaValue<HitType> target_decoy, is_decoy;
205 
206  HasDecoyAnnotation() : target_decoy("target_decoy", "decoy"), is_decoy("isDecoy", "true")
207  {
208  }
209 
210  bool operator()(const HitType& hit) const
211  {
212  // @TODO: this could be done slightly more efficiently by returning
213  // false if the "target_decoy" meta value is "target" or "target+decoy",
214  // without checking for an "isDecoy" meta value in that case
215  return target_decoy(hit) || is_decoy(hit);
216  }
217  };
218 
224  template<class HitType>
226  typedef HitType argument_type; // for use as a predicate
227 
228  const std::unordered_set<String>& accessions;
229 
230  HasMatchingAccessionUnordered(const std::unordered_set<String>& accessions_) : accessions(accessions_)
231  {
232  }
233 
234  bool operator()(const PeptideHit& hit) const
235  {
236  for (const auto& it : hit.extractProteinAccessionsSet())
237  {
238  if (accessions.count(it) > 0)
239  return true;
240  }
241  return false;
242  }
243 
244  bool operator()(const ProteinHit& hit) const
245  {
246  return (accessions.count(hit.getAccession()) > 0);
247  }
248 
249  bool operator()(const PeptideEvidence& evidence) const
250  {
251  return (accessions.count(evidence.getProteinAccession()) > 0);
252  }
253  };
254 
260  template<class HitType>
262  typedef HitType argument_type; // for use as a predicate
263 
264  const std::set<String>& accessions;
265 
266  HasMatchingAccession(const std::set<String>& accessions_) : accessions(accessions_)
267  {
268  }
269 
270  bool operator()(const PeptideHit& hit) const
271  {
272  for (const auto& it : hit.extractProteinAccessionsSet())
273  {
274  if (accessions.count(it) > 0)
275  return true;
276  }
277  return false;
278  }
279 
280  bool operator()(const ProteinHit& hit) const
281  {
282  return (accessions.count(hit.getAccession()) > 0);
283  }
284 
285  bool operator()(const PeptideEvidence& evidence) const
286  {
287  return (accessions.count(evidence.getProteinAccession()) > 0);
288  }
289  };
290 
296  template<class HitType, class Entry>
298  typedef HitType argument_type; // for use as a predicate
299  typedef std::map<String, Entry*> ItemMap; // Store pointers to avoid copying data
301 
302  GetMatchingItems(std::vector<Entry>& records)
303  {
304  for (typename std::vector<Entry>::iterator rec_it = records.begin(); rec_it != records.end(); ++rec_it)
305  {
306  items[getKey(*rec_it)] = &(*rec_it);
307  }
308  }
309 
311  {
312  }
313 
314  const String& getKey(const FASTAFile::FASTAEntry& entry) const
315  {
316  return entry.identifier;
317  }
318 
319  bool exists(const HitType& hit) const
320  {
321  return items.count(getHitKey(hit)) > 0;
322  }
323 
324  const String& getHitKey(const PeptideEvidence& p) const
325  {
326  return p.getProteinAccession();
327  }
328 
329  const Entry& getValue(const PeptideEvidence& evidence) const
330  {
331  if (!exists(evidence))
332  {
333  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Accession: '" + getHitKey(evidence) + "'. peptide evidence accession not in data");
334  }
335  return *(items.find(getHitKey(evidence))->second);
336  }
337  };
338 
340 
341 
348 
350  struct HasMinPeptideLength;
351 
353  struct HasMinCharge;
354 
356  struct HasLowMZError;
357 
363  struct HasMatchingModification;
364 
370  struct HasMatchingSequence;
371 
373  struct HasNoEvidence;
374 
375 
382  {
383  private:
387 
388  public:
390  PeptideDigestionFilter(EnzymaticDigestion& digestion, Int min, Int max) : digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
391  {
392  }
393 
394  static inline Int disabledValue()
395  {
396  return -1;
397  }
398 
401  bool operator()(PeptideHit& p) const
402  {
403  const auto& fun = [&](const Int missed_cleavages) {
404  bool max_filter = max_cleavages_ != disabledValue() ? missed_cleavages > max_cleavages_ : false;
405  bool min_filter = min_cleavages_ != disabledValue() ? missed_cleavages < min_cleavages_ : false;
406  return max_filter || min_filter;
407  };
408  return digestion_.filterByMissedCleavages(p.getSequence().toUnmodifiedString(), fun);
409  }
410 
411  void filterPeptideSequences(std::vector<PeptideHit>& hits)
412  {
413  hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)), hits.end());
414  }
415  };
416 
417 
425 
426  // Build an accession index to avoid the linear search cost
431 
432  DigestionFilter(std::vector<FASTAFile::FASTAEntry>& entries, ProteaseDigestion& digestion, bool ignore_missed_cleavages, bool methionine_cleavage) :
433  accession_resolver_(entries), digestion_(digestion), ignore_missed_cleavages_(ignore_missed_cleavages), methionine_cleavage_(methionine_cleavage)
434  {
435  }
436 
437  bool operator()(const PeptideEvidence& evidence) const
438  {
439  if (!evidence.hasValidLimits())
440  {
441  OPENMS_LOG_WARN << "Invalid limits! Peptide '" << evidence.getProteinAccession() << "' not filtered" << std::endl;
442  return true;
443  }
444 
445  if (accession_resolver_.exists(evidence))
446  {
447  return digestion_.isValidProduct(AASequence::fromString(accession_resolver_.getValue(evidence).sequence), evidence.getStart(), evidence.getEnd() - evidence.getStart(),
448  ignore_missed_cleavages_, methionine_cleavage_);
449  }
450  else
451  {
452  if (evidence.getProteinAccession().empty())
453  {
454  OPENMS_LOG_WARN << "Peptide accession not available! Skipping Evidence." << std::endl;
455  }
456  else
457  {
458  OPENMS_LOG_WARN << "Peptide accession '" << evidence.getProteinAccession() << "' not found in fasta file!" << std::endl;
459  }
460  return true;
461  }
462  }
463 
464  void filterPeptideEvidences(std::vector<PeptideIdentification>& peptides)
465  {
466  IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*this, peptides);
467  }
468  };
469 
471 
472 
475 
477  template<class IdentificationType>
478  struct HasNoHits {
479  typedef IdentificationType argument_type; // for use as a predicate
480 
481  bool operator()(const IdentificationType& id) const
482  {
483  return id.getHits().empty();
484  }
485  };
486 
488 
489 
492 
494  struct HasRTInRange;
495 
497  struct HasMZInRange;
498 
500 
501 
508 
510  template<class Container, class Predicate>
511  static void removeMatchingItems(Container& items, const Predicate& pred)
512  {
513  items.erase(std::remove_if(items.begin(), items.end(), pred), items.end());
514  }
515 
517  template<class Container, class Predicate>
518  static void keepMatchingItems(Container& items, const Predicate& pred)
519  {
520  items.erase(std::remove_if(items.begin(), items.end(), std::not_fn(pred)), items.end());
521  }
522 
524  template<class Container, class Predicate>
525  static void moveMatchingItems(Container& items, const Predicate& pred, Container& target)
526  {
527  auto part = std::partition(items.begin(), items.end(), std::not_fn(pred));
528  std::move(part, items.end(), std::back_inserter(target));
529  items.erase(part, items.end());
530  }
531 
533  template<class IDContainer, class Predicate>
534  static void removeMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
535  {
536  for (auto& item : items)
537  {
538  removeMatchingItems(item.getHits(), pred);
539  }
540  }
541 
543  template<class IDContainer, class Predicate>
544  static void keepMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
545  {
546  for (auto& item : items)
547  {
548  keepMatchingItems(item.getHits(), pred);
549  }
550  }
551 
552  template<class MapType, class Predicate>
553  static void keepMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
554  {
555  for (auto& feat : prot_and_pep_ids)
556  {
557  keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
558  }
559  keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
560  }
561 
562  template<class MapType, class Predicate>
563  static void removeMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
564  {
565  for (auto& feat : prot_and_pep_ids)
566  {
567  removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
568  }
569  removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
570  }
571 
572  template<class MapType, class Predicate>
573  static void removeMatchingPeptideIdentifications(MapType& prot_and_pep_ids, Predicate& pred)
574  {
575  for (auto& feat : prot_and_pep_ids)
576  {
577  removeMatchingItems(feat.getPeptideIdentifications(), pred);
578  }
579  removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
580  }
581 
583 
584 
587 
589  template<class IdentificationType>
590  static Size countHits(const std::vector<IdentificationType>& ids)
591  {
592  Size counter = 0;
593  for (typename std::vector<IdentificationType>::const_iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
594  {
595  counter += id_it->getHits().size();
596  }
597  return counter;
598  }
599 
613  template<class IdentificationType>
614  static bool getBestHit(const std::vector<IdentificationType>& identifications, bool assume_sorted, typename IdentificationType::HitType& best_hit)
615  {
616  if (identifications.empty())
617  return false;
618 
619  typename std::vector<IdentificationType>::const_iterator best_id_it = identifications.end();
620  typename std::vector<typename IdentificationType::HitType>::const_iterator best_hit_it;
621 
622  for (typename std::vector<IdentificationType>::const_iterator id_it = identifications.begin(); id_it != identifications.end(); ++id_it)
623  {
624  if (id_it->getHits().empty())
625  continue;
626 
627  if (best_id_it == identifications.end()) // no previous "best" hit
628  {
629  best_id_it = id_it;
630  best_hit_it = id_it->getHits().begin();
631  }
632  else if (best_id_it->getScoreType() != id_it->getScoreType())
633  {
634  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Can't compare scores of different types", best_id_it->getScoreType() + "/" + id_it->getScoreType());
635  }
636 
637  bool higher_better = best_id_it->isHigherScoreBetter();
638  for (typename std::vector<typename IdentificationType::HitType>::const_iterator hit_it = id_it->getHits().begin(); hit_it != id_it->getHits().end(); ++hit_it)
639  {
640  if ((higher_better && (hit_it->getScore() > best_hit_it->getScore())) || (!higher_better && (hit_it->getScore() < best_hit_it->getScore())))
641  {
642  best_hit_it = hit_it;
643  }
644  if (assume_sorted)
645  break; // only consider the first hit
646  }
647  }
648 
649  if (best_id_it == identifications.end())
650  {
651  return false; // no hits in any IDs
652  }
653 
654  best_hit = *best_hit_it;
655  return true;
656  }
657 
665  static void extractPeptideSequences(const std::vector<PeptideIdentification>& peptides, std::set<String>& sequences, bool ignore_mods = false);
666 
672  static std::map<String, std::vector<ProteinHit>> extractUnassignedProteins(ConsensusMap& cmap);
673 
679  template<class EvidenceFilter>
680  static void FilterPeptideEvidences(EvidenceFilter& filter, std::vector<PeptideIdentification>& peptides)
681  {
682  for (std::vector<PeptideIdentification>::iterator pep_it = peptides.begin(); pep_it != peptides.end(); ++pep_it)
683  {
684  for (std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it)
685  {
686  std::vector<PeptideEvidence> evidences;
687  remove_copy_if(hit_it->getPeptideEvidences().begin(), hit_it->getPeptideEvidences().end(), back_inserter(evidences), std::not_fn(filter));
688  hit_it->setPeptideEvidences(evidences);
689  }
690  }
691  }
692 
694 
695 
698 
700  template<class IdentificationType>
701  static void updateHitRanks(std::vector<IdentificationType>& ids)
702  {
703  for (typename std::vector<IdentificationType>::iterator it = ids.begin(); it != ids.end(); ++it)
704  {
705  it->assignRanks();
706  }
707  }
708 
711  static void removeUnreferencedProteins(ConsensusMap& cmap, bool include_unassigned);
712 
714  static void removeUnreferencedProteins(std::vector<ProteinIdentification>& proteins, const std::vector<PeptideIdentification>& peptides);
716  static void removeUnreferencedProteins(ProteinIdentification& proteins, const std::vector<PeptideIdentification>& peptides);
717 
725  static void updateProteinReferences(std::vector<PeptideIdentification>& peptides, const std::vector<ProteinIdentification>& proteins, bool remove_peptides_without_reference = false);
726 
734  static void updateProteinReferences(ConsensusMap& cmap, bool remove_peptides_without_reference = false);
735 
743  static void updateProteinReferences(ConsensusMap& cmap, const ProteinIdentification& ref_run, bool remove_peptides_without_reference = false);
744 
753  static bool updateProteinGroups(std::vector<ProteinIdentification::ProteinGroup>& groups, const std::vector<ProteinHit>& hits);
754 
761  static void removeUngroupedProteins(const std::vector<ProteinIdentification::ProteinGroup>& groups, std::vector<ProteinHit>& hits);
763 
764 
767 
769  template<class IdentificationType>
770  static void removeEmptyIdentifications(std::vector<IdentificationType>& ids)
771  {
772  struct HasNoHits<IdentificationType> empty_filter;
773  removeMatchingItems(ids, empty_filter);
774  }
775 
781  template<class IdentificationType>
782  static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score)
783  {
784  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
785  {
786  struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id_it->isHigherScoreBetter());
787  keepMatchingItems(id_it->getHits(), score_filter);
788  }
789  }
790 
797  static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps, double threshold_score, bool higher_better);
798 
804  template<class IdentificationType>
805  static void filterHitsByScore(IdentificationType& id, double threshold_score)
806  {
807  struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
808  keepMatchingItems(id.getHits(), score_filter);
809  }
810 
816  template<class IdentificationType>
817  static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
818  {
819  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
820  {
821  id_it->sort();
822  if (n < id_it->getHits().size())
823  id_it->getHits().resize(n);
824  }
825  }
826 
841  template<class IdentificationType>
842  static void filterHitsByRank(std::vector<IdentificationType>& ids, Size min_rank, Size max_rank)
843  {
844  updateHitRanks(ids);
845  if (min_rank > 1)
846  {
847  struct HasMaxRank<typename IdentificationType::HitType> rank_filter(min_rank - 1);
848  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
849  {
850  removeMatchingItems(id_it->getHits(), rank_filter);
851  }
852  }
853  if (max_rank >= min_rank)
854  {
855  struct HasMaxRank<typename IdentificationType::HitType> rank_filter(max_rank);
856  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
857  {
858  keepMatchingItems(id_it->getHits(), rank_filter);
859  }
860  }
861  }
862 
870  template<class IdentificationType>
871  static void removeDecoyHits(std::vector<IdentificationType>& ids)
872  {
873  struct HasDecoyAnnotation<typename IdentificationType::HitType> decoy_filter;
874  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
875  {
876  removeMatchingItems(id_it->getHits(), decoy_filter);
877  }
878  }
879 
887  template<class IdentificationType>
888  static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String> accessions)
889  {
890  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
891  for (auto& id_it : ids)
892  {
893  removeMatchingItems(id_it.getHits(), acc_filter);
894  }
895  }
896 
904  template<class IdentificationType>
905  static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String>& accessions)
906  {
907  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
908  for (auto& id_it : ids)
909  {
910  keepMatchingItems(id_it.getHits(), acc_filter);
911  }
912  }
913 
915 
916 
919 
926  static void keepBestPeptideHits(std::vector<PeptideIdentification>& peptides, bool strict = false);
927 
936  static void filterPeptidesByLength(std::vector<PeptideIdentification>& peptides, Size min_length, Size max_length = UINT_MAX);
937 
946  static void filterPeptidesByCharge(std::vector<PeptideIdentification>& peptides, Int min_charge, Int max_charge);
947 
949  static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides, double min_rt, double max_rt);
950 
952  static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides, double min_mz, double max_mz);
953 
965  static void filterPeptidesByMZError(std::vector<PeptideIdentification>& peptides, double mass_error, bool unit_ppm);
966 
967 
974  template<class Filter>
975  static void filterPeptideEvidences(Filter& filter, std::vector<PeptideIdentification>& peptides);
976 
988  static void filterPeptidesByRTPredictPValue(std::vector<PeptideIdentification>& peptides, const String& metavalue_key, double threshold = 0.05);
989 
991  static void removePeptidesWithMatchingModifications(std::vector<PeptideIdentification>& peptides, const std::set<String>& modifications);
992 
993  static void removePeptidesWithMatchingRegEx(std::vector<PeptideIdentification>& peptides, const String& regex);
994 
996  static void keepPeptidesWithMatchingModifications(std::vector<PeptideIdentification>& peptides, const std::set<String>& modifications);
997 
1005  static void removePeptidesWithMatchingSequences(std::vector<PeptideIdentification>& peptides, const std::vector<PeptideIdentification>& bad_peptides, bool ignore_mods = false);
1006 
1014  static void keepPeptidesWithMatchingSequences(std::vector<PeptideIdentification>& peptides, const std::vector<PeptideIdentification>& good_peptides, bool ignore_mods = false);
1015 
1017  static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>& peptides);
1018 
1025  static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>& peptides, bool seq_only = false);
1026 
1028 
1029 
1032 
1034  static void filterHitsByScore(PeakMap& experiment, double peptide_threshold_score, double protein_threshold_score)
1035  {
1036  // filter protein hits:
1037  filterHitsByScore(experiment.getProteinIdentifications(), protein_threshold_score);
1038  // don't remove empty protein IDs - they contain search metadata and may
1039  // be referenced by peptide IDs (via run ID)
1040 
1041  // filter peptide hits:
1042  for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it)
1043  {
1044  filterHitsByScore(exp_it->getPeptideIdentifications(), peptide_threshold_score);
1045  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1046  // TODO super-duper inefficient.
1047  updateProteinReferences(exp_it->getPeptideIdentifications(), experiment.getProteinIdentifications());
1048  }
1049  // @TODO: remove proteins that aren't referenced by peptides any more?
1050  }
1051 
1053  static void keepNBestHits(PeakMap& experiment, Size n)
1054  {
1055  // don't filter the protein hits by "N best" here - filter the peptides
1056  // and update the protein hits!
1057  std::vector<PeptideIdentification> all_peptides; // IDs from all spectra
1058 
1059  // filter peptide hits:
1060  for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it)
1061  {
1062  std::vector<PeptideIdentification>& peptides = exp_it->getPeptideIdentifications();
1063  keepNBestHits(peptides, n);
1064  removeEmptyIdentifications(peptides);
1065  updateProteinReferences(peptides, experiment.getProteinIdentifications());
1066  all_peptides.insert(all_peptides.end(), peptides.begin(), peptides.end());
1067  }
1068  // update protein hits:
1069  removeUnreferencedProteins(experiment.getProteinIdentifications(), all_peptides);
1070  }
1071 
1074  static void keepNBestSpectra(std::vector<PeptideIdentification>& peptides, Size n);
1075 
1077  template<class MapType>
1078  static void keepNBestPeptideHits(MapType& map, Size n)
1079  {
1080  // The rank predicate needs annotated ranks, not sure if they are always updated. Use the following instead,
1081  // which sorts Hits first.
1082  for (auto& feat : map)
1083  {
1084  keepNBestHits(feat.getPeptideIdentifications(), n);
1085  }
1086  keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1087  }
1088 
1089  template<class MapType>
1090  static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1091  {
1092  const auto pred = HasNoHits<PeptideIdentification>();
1093  removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1094  }
1095 
1097  static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1098  {
1099  annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1100  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1101  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1102  }
1103 
1104  static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1105  {
1106  annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1107  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1108  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1109  }
1110 
1111  // TODO allow skipping unassigned?
1112  template<class MapType>
1113  static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1114  {
1115  const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1116 
1117  RunToSequenceToChargeToPepHitP best_peps_per_run;
1118  for (const auto& idrun : prot_ids)
1119  {
1120  best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1121  }
1122 
1123  for (auto& feat : prot_and_pep_ids)
1124  {
1125  annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1126  }
1127 
1128  annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1129  }
1130 
1131  template<class MapType>
1132  static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1133  {
1134  annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1135  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1136  keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1137  }
1138 
1141  static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges,
1142  Size nr_best_spectrum)
1143  {
1144  RunToSequenceToChargeToPepHitP best_peps_per_run;
1145  for (const auto& id : prot_ids)
1146  {
1147  best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1148  }
1149  annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1150  }
1151 
1155  static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges,
1156  Size nr_best_spectrum)
1157  {
1158  for (auto& pep : pep_ids)
1159  {
1160  SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1161  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1162  }
1163  }
1164 
1168  static void annotateBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1169  {
1170  SequenceToChargeToPepHitP best_pep;
1171  for (auto& pep : pep_ids)
1172  {
1173  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1174  }
1175  }
1176 
1181  static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1182  {
1183  bool higher_score_better = pep.isHigherScoreBetter();
1184  // make sure that first = best hit
1185  pep.sort();
1186 
1187  auto pepIt = pep.getHits().begin();
1188  auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1189  for (; pepIt != pepItEnd; ++pepIt)
1190  {
1191  PeptideHit& hit = *pepIt;
1192 
1193  String lookup_seq;
1194  if (ignore_mods)
1195  {
1196  lookup_seq = hit.getSequence().toUnmodifiedString();
1197  }
1198  else
1199  {
1200  lookup_seq = hit.getSequence().toString();
1201  }
1202 
1203  int lookup_charge = 0;
1204  if (!ignore_charges)
1205  {
1206  lookup_charge = hit.getCharge();
1207  }
1208 
1209  // try to insert
1210  auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1211  auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1212 
1213  PeptideHit*& p = it_inserted_chg.first->second; // now this gets either the old one if already present, or this
1214  if (!it_inserted_chg.second) // was already present -> possibly update
1215  {
1216  if ((higher_score_better && (hit.getScore() > p->getScore())) || (!higher_score_better && (hit.getScore() < p->getScore())))
1217  {
1218  p->setMetaValue("best_per_peptide", 0);
1219  hit.setMetaValue("best_per_peptide", 1);
1220  p = &hit;
1221  }
1222  else // note that this was def. not the best
1223  {
1224  // TODO if it is only about filtering, we can omit writing this metavalue (absence = false)
1225  hit.setMetaValue("best_per_peptide", 0);
1226  }
1227  }
1228  else // newly inserted -> first for that sequence (and optionally charge)
1229  {
1230  hit.setMetaValue("best_per_peptide", 1);
1231  }
1232  }
1233  }
1234 
1236  static void keepHitsMatchingProteins(PeakMap& experiment, const std::vector<FASTAFile::FASTAEntry>& proteins)
1237  {
1238  std::set<String> accessions;
1239  for (std::vector<FASTAFile::FASTAEntry>::const_iterator it = proteins.begin(); it != proteins.end(); ++it)
1240  {
1241  accessions.insert(it->identifier);
1242  }
1243 
1244  // filter protein hits:
1245  keepHitsMatchingProteins(experiment.getProteinIdentifications(), accessions);
1246  updateHitRanks(experiment.getProteinIdentifications());
1247 
1248  // filter peptide hits:
1249  for (PeakMap::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it)
1250  {
1251  if (exp_it->getMSLevel() == 2)
1252  {
1253  keepHitsMatchingProteins(exp_it->getPeptideIdentifications(), accessions);
1254  removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1255  updateHitRanks(exp_it->getPeptideIdentifications());
1256  }
1257  }
1258  }
1259 
1261 
1262 
1265 
1276 
1289 
1295  static void removeDecoys(IdentificationData& id_data);
1297  };
1298 
1299 } // namespace OpenMS
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:470
String toString() const
returns the peptide as string with modifications embedded in brackets
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
A container for consensus elements.
Definition: ConsensusMap.h:92
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:59
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:388
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:64
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
A method or algorithm argument contains illegal values.
Definition: Exception.h:650
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:341
Invalid value exception.
Definition: Exception.h:329
Not all required information provided.
Definition: Exception.h:188
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:382
Int max_cleavages_
Definition: IDFilter.h:386
EnzymaticDigestion & digestion_
Definition: IDFilter.h:384
PeptideHit argument_type
Definition: IDFilter.h:389
Int min_cleavages_
Definition: IDFilter.h:385
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:401
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:411
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:390
static Int disabledValue()
Definition: IDFilter.h:394
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:79
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:782
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:525
static void filterPeptidesByLength(std::vector< PeptideIdentification > &peptides, Size min_length, Size max_length=UINT_MAX)
Filters peptide identifications according to peptide sequence length.
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1053
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
static void annotateBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1113
static void removeUnreferencedProteins(ProteinIdentification &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void removeDuplicatePeptideHits(std::vector< PeptideIdentification > &peptides, bool seq_only=false)
Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID).
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:90
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:553
static void removeEmptyIdentifications(MapType &prot_and_pep_ids)
Definition: IDFilter.h:1090
static void annotateBestPerPeptidePerRun(const std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1141
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:511
static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP &best_peps_per_run, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1155
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:89
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:770
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:544
static void extractPeptideSequences(const std::vector< PeptideIdentification > &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:871
static void removePeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Removes all peptide hits that have at least one of the given modifications.
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:518
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
static void keepPeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Keeps only peptide hits that have at least one of the given modifications.
static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP &best_pep, PeptideIdentification &pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1181
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:563
static void keepBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptid...
Definition: IDFilter.h:1097
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static void filterPeptidesByCharge(std::vector< PeptideIdentification > &peptides, Int min_charge, Int max_charge)
Filters peptide identifications according to charge state.
static void filterPeptidesByMZError(std::vector< PeptideIdentification > &peptides, double mass_error, bool unit_ppm)
Filter peptide identifications according to mass deviation.
static void updateProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:680
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:842
static void keepNBestPeptideHits(MapType &map, Size n)
Filters a Consensus/FeatureMap by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1078
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:534
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:573
static void filterPeptidesByRT(std::vector< PeptideIdentification > &peptides, double min_rt, double max_rt)
Filters peptide identifications by precursor RT, keeping only IDs in the given range.
static void filterPeptideEvidences(Filter &filter, std::vector< PeptideIdentification > &peptides)
Digest a collection of proteins and filter PeptideEvidences based on specificity PeptideEvidences of ...
static void removePeptidesWithMatchingRegEx(std::vector< PeptideIdentification > &peptides, const String &regex)
static void removePeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &bad_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that matches one in bad_peptides.
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:590
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:614
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:701
static void keepBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1132
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1236
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:888
static void keepBestPerPeptidePerRun(std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1104
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > &accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:905
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void keepNBestSpectra(std::vector< PeptideIdentification > &peptides, Size n)
static void filterPeptidesByMZ(std::vector< PeptideIdentification > &peptides, double min_mz, double max_mz)
Filters peptide identifications by precursor m/z, keeping only IDs in the given range.
static void keepUniquePeptidesPerProtein(std::vector< PeptideIdentification > &peptides)
Removes all peptides that are not annotated as unique for a protein (by PeptideIndexer)
static void keepPeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &good_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that does not match one in good_peptides.
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void annotateBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1168
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:1034
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:88
Definition: IdentificationData.h:113
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:72
Iterator begin()
Definition: MSExperiment.h:182
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:103
Iterator end()
Definition: MSExperiment.h:192
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
Representation of a peptide evidence.
Definition: PeptideEvidence.h:51
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Representation of a peptide hit.
Definition: PeptideHit.h:57
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Int getCharge() const
returns the charge of the peptide
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
void sort()
Sorts the hits by score.
bool isHigherScoreBetter() const
returns the peptide score orientation
Class for the enzymatic digestion of proteins represented as AASequence or String.
Definition: ProteaseDigestion.h:60
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:60
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:76
A more convenient string class.
Definition: String.h:60
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:72
String identifier
Definition: FASTAFile.h:73
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:423
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:432
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:427
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:464
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:437
bool ignore_missed_cleavages_
Definition: IDFilter.h:429
PeptideEvidence argument_type
Definition: IDFilter.h:424
ProteaseDigestion & digestion_
Definition: IDFilter.h:428
bool methionine_cleavage_
Definition: IDFilter.h:430
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:297
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:299
GetMatchingItems()
Definition: IDFilter.h:310
ItemMap items
Definition: IDFilter.h:300
HitType argument_type
Definition: IDFilter.h:298
bool exists(const HitType &hit) const
Definition: IDFilter.h:319
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:329
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:302
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:314
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:324
Is this a decoy hit?
Definition: IDFilter.h:201
bool operator()(const HitType &hit) const
Definition: IDFilter.h:210
HitType argument_type
Definition: IDFilter.h:202
HasDecoyAnnotation()
Definition: IDFilter.h:206
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:101
bool operator()(const HitType &hit) const
Definition: IDFilter.h:111
double score
Definition: IDFilter.h:104
HitType argument_type
Definition: IDFilter.h:102
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:107
bool higher_score_better
Definition: IDFilter.h:105
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:225
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:230
HitType argument_type
Definition: IDFilter.h:226
const std::unordered_set< String > & accessions
Definition: IDFilter.h:228
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:234
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:249
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:244
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:261
HitType argument_type
Definition: IDFilter.h:262
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:270
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:285
const std::set< String > & accessions
Definition: IDFilter.h:264
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:266
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:280
Does a meta value of this hit have at most the given value?
Definition: IDFilter.h:180
bool operator()(const HitType &hit) const
Definition: IDFilter.h:190
HasMaxMetaValue(const String &key_, const double &value_)
Definition: IDFilter.h:186
HitType argument_type
Definition: IDFilter.h:181
String key
Definition: IDFilter.h:183
double value
Definition: IDFilter.h:184
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:127
bool operator()(const HitType &hit) const
Definition: IDFilter.h:140
HitType argument_type
Definition: IDFilter.h:128
Size rank
Definition: IDFilter.h:130
HasMaxRank(Size rank_)
Definition: IDFilter.h:132
Is a meta value with given key and value set on this hit?
Definition: IDFilter.h:157
bool operator()(const HitType &hit) const
Definition: IDFilter.h:167
DataValue value
Definition: IDFilter.h:161
HitType argument_type
Definition: IDFilter.h:158
HasMetaValue(const String &key_, const DataValue &value_)
Definition: IDFilter.h:163
String key
Definition: IDFilter.h:160
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:478
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:481
IdentificationType argument_type
Definition: IDFilter.h:479
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:46