OpenMS
IDBoostGraph.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Julianus Pfeuffer $
32 // $Authors: Julianus Pfeuffer $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 // define to get timings for connected components
38 //#define INFERENCE_BENCH
39 
40 #include <OpenMS/ANALYSIS/ID/MessagePasserFactory.h> //included in BPI
41 #include <OpenMS/CONCEPT/Types.h>
46 
47 #include <vector>
48 #include <unordered_map>
49 #include <queue>
50 
51 #include <boost/function.hpp>
52 #include <boost/blank.hpp>
53 #include <boost/serialization/strong_typedef.hpp>
54 #include <boost/graph/adjacency_list.hpp>
55 #include <boost/graph/depth_first_search.hpp>
56 #include <boost/graph/filtered_graph.hpp>
57 #include <boost/graph/properties.hpp>
58 #include <boost/variant.hpp>
59 #include <boost/variant/detail/hash_variant.hpp>
60 #include <boost/variant/static_visitor.hpp>
61 
62 namespace OpenMS
63 {
64  struct ScoreToTgtDecLabelPairs;
65 
66  namespace Internal
67  {
68 
81  //TODO Add OPENMS_DLLAPI everywhere
82  class OPENMS_DLLAPI IDBoostGraph
83  {
84 
85  public:
86 
87  // boost has a weird extra semicolon in their strong typedef
88  #pragma clang diagnostic push
89  #pragma clang diagnostic ignored "-Wextra-semi"
90 
92  BOOST_STRONG_TYPEDEF(boost::blank, PeptideCluster);
93 
95  struct ProteinGroup
96  {
97  int size = 0;
98  int tgts = 0;
99  double score = 0.;
100  };
101 
104 
107 
109  BOOST_STRONG_TYPEDEF(int, Charge);
110 
111  #pragma clang diagnostic pop
112 
113  //typedefs
114  //TODO rename ProteinGroup type since it collides with the actual OpenMS ProteinGroup
115  typedef boost::variant<ProteinHit*, ProteinGroup, PeptideCluster, Peptide, RunIndex, Charge, PeptideHit*> IDPointer;
116  typedef boost::variant<const ProteinHit*, const ProteinGroup*, const PeptideCluster*, const Peptide, const RunIndex, const Charge, const PeptideHit*> IDPointerConst;
117  //TODO check the impact of different data structures to store nodes/edges
118  // Directed graphs would make the internal computations much easier (less in/out edge checking) but boost
119  // does not allow computation of "non-strongly" connected components for directed graphs, which is what we would
120  // need. We can think about after/while copying to CCs, to insert it into a directed graph!
121  typedef boost::adjacency_list <boost::setS, boost::vecS, boost::undirectedS, IDPointer> Graph;
122  typedef std::vector<Graph> Graphs;
123  typedef boost::adjacency_list <boost::setS, boost::vecS, boost::undirectedS, IDPointer> GraphConst;
124 
125  typedef boost::graph_traits<Graph>::vertex_descriptor vertex_t;
126  typedef boost::graph_traits<Graph>::edge_descriptor edge_t;
127 
128  typedef std::set<IDBoostGraph::vertex_t> ProteinNodeSet;
129  typedef std::set<IDBoostGraph::vertex_t> PeptideNodeSet;
130 
131 
134  public boost::default_dfs_visitor
135  {
136  public:
138  : gs(vgs), curr_v(0), next_v(0), m()
139  {}
140 
141  template < typename Vertex, typename Graph >
142  void start_vertex(Vertex u, const Graph & tg)
143  {
144  gs.emplace_back();
145  next_v = boost::add_vertex(tg[u], gs.back());
146  m[u] = next_v;
147  }
148 
149  template < typename Vertex, typename Graph >
150  void discover_vertex(Vertex /*u*/, const Graph & /*tg*/)
151  {
152  curr_v = next_v;
153  }
154 
155  template < typename Edge, typename Graph >
156  void examine_edge(Edge e, const Graph & tg)
157  {
158  if (m.find(e.m_target) == m.end())
159  {
160  next_v = boost::add_vertex(tg[e.m_target], gs.back());
161  m[e.m_target] = next_v;
162  }
163  else
164  {
165  next_v = m[e.m_target];
166  }
167 
168  boost::add_edge(m[e.m_source], next_v, gs.back());
169  }
170 
172  vertex_t curr_v, next_v;
174  std::map<vertex_t, vertex_t> m;
175  };
176 
180  public boost::static_visitor<OpenMS::String>
181  {
182  public:
183 
185  {
186  return pep->getSequence().toString() + "_" + pep->getCharge();
187  }
188 
190  {
191  return prot->getAccession();
192  }
193 
194  OpenMS::String operator()(const ProteinGroup& /*protgrp*/) const
195  {
196  return "PG";
197  }
198 
199  OpenMS::String operator()(const PeptideCluster& /*pc*/) const
200  {
201  return "PepClust";
202  }
203 
204  OpenMS::String operator()(const Peptide& peptide) const
205  {
206  return peptide;
207  }
208 
209  OpenMS::String operator()(const RunIndex& ri) const
210  {
211  return "rep" + String(ri);
212  }
213 
214  OpenMS::String operator()(const Charge& chg) const
215  {
216  return "chg" + String(chg);
217  }
218 
219  };
220 
223  template<class CharT>
225  public boost::static_visitor<>
226  {
227  public:
228 
229  explicit PrintAddressVisitor(std::basic_ostream<CharT> stream):
230  stream_(stream)
231  {}
232 
233  void operator()(PeptideHit* pep) const
234  {
235  stream_ << pep->getSequence().toUnmodifiedString() << ": " << pep << std::endl;
236  }
237 
238  void operator()(ProteinHit* prot) const
239  {
240  stream_ << prot->getAccession() << ": " << prot << std::endl;
241  }
242 
243  void operator()(const ProteinGroup& /*protgrp*/) const
244  {
245  stream_ << "PG" << std::endl;
246  }
247 
248  void operator()(const PeptideCluster& /*pc*/) const
249  {
250  stream_ << "PepClust" << std::endl;
251  }
252 
253  void operator()(const Peptide& peptide) const
254  {
255  stream_ << peptide << std::endl;
256  }
257 
258  void operator()(const RunIndex& ri) const
259  {
260  stream_ << "rep" << ri << std::endl;
261  }
262 
263  void operator()(const Charge& chg) const
264  {
265  stream_ << "chg" << chg << std::endl;
266  }
267 
268  std::basic_ostream<CharT> stream_;
269  };
270 
275  public boost::static_visitor<>
276  {
277  public:
278 
279  void operator()(PeptideHit* pep, double posterior) const
280  {
281  pep->setScore(posterior);
282  }
283 
284  void operator()(ProteinHit* prot, double posterior) const
285  {
286  prot->setScore(posterior);
287  }
288 
289  void operator()(ProteinGroup& pg, double posterior) const
290  {
291  pg.score = posterior;
292  }
293 
294  // Everything else, do nothing for now
295  template <class T>
296  void operator()(T& /*any node type*/, double /*posterior*/) const
297  {
298  // do nothing
299  }
300 
301  };
302 
306  public boost::static_visitor<double>
307  {
308  public:
309 
310  double operator()(PeptideHit* pep) const
311  {
312  return pep->getScore();
313  }
314 
315  double operator()(ProteinHit* prot) const
316  {
317  return prot->getScore();
318  }
319 
320  double operator()(ProteinGroup& pg) const
321  {
322  return pg.score;
323  }
324 
325  // Everything else, do nothing for now
326  template <class T>
327  double operator()(T& /*any node type*/) const
328  {
329  return -1.0;
330  }
331 
332  };
333 
338  public boost::static_visitor<std::pair<double,bool>>
339  {
340  public:
341 
342  std::pair<double,bool> operator()(PeptideHit* pep) const
343  {
344  return {pep->getScore(), pep->getMetaValue("target_decoy").toString()[0] == 't'};
345  }
346 
347  std::pair<double,bool> operator()(ProteinHit* prot) const
348  {
349  return {prot->getScore(), prot->getMetaValue("target_decoy").toString()[0] == 't'};
350  }
351 
352  std::pair<double,bool> operator()(ProteinGroup& pg) const
353  {
354  return {pg.score, pg.tgts > 0};
355  }
356 
357  // Everything else, do nothing for now
358  template <class T>
359  std::pair<double,bool> operator()(T& /*any node type*/) const
360  {
361  return {-1.0, false};
362  }
363  };
364 
367  std::vector<PeptideIdentification>& idedSpectra,
368  Size use_top_psms,
369  bool use_run_info,
370  bool best_psms_annotated,
371  const std::optional<const ExperimentalDesign>& ed = std::optional<const ExperimentalDesign>());
372 
374  ConsensusMap& cmap,
375  Size use_top_psms,
376  bool use_run_info,
377  bool use_unassigned_ids,
378  bool best_psms_annotated,
379  const std::optional<const ExperimentalDesign>& ed = std::optional<const ExperimentalDesign>());
380 
381 
382  //TODO think about templating to avoid wrapping to std::function
383  // although we usually do long-running tasks per CC such that the extra virtual call does not matter much
384  // Instead we gain type erasure.
386  void applyFunctorOnCCs(const std::function<unsigned long(Graph&, unsigned int)>& functor);
388  void applyFunctorOnCCsST(const std::function<void(Graph&)>& functor);
389 
393 
394  //TODO create a new class for an extended Graph and try to reuse as much as possible
395  // use inheritance or templates
399 
406  void annotateIndistProteins(bool addSingletons = true);
407 
411  void calculateAndAnnotateIndistProteins(bool addSingletons = true);
412 
415 
422  void resolveGraphPeptideCentric(bool removeAssociationsInData = true);
423 
424 
425 
428 
432  const Graph& getComponent(Size cc);
433 
437 
438  //TODO docu
439  //void buildExtendedGraph(bool use_all_psms, std::pair<int,int> chargeRange, unsigned int nrReplicates);
440 
444  static void printGraph(std::ostream& out, const Graph& fg);
445 
454  void getUpstreamNodesNonRecursive(std::queue<vertex_t>& q, const Graph& graph, int lvl,
455  bool stop_at_first, std::vector<vertex_t>& result);
456 
465  void getDownstreamNodesNonRecursive(std::queue<vertex_t>& q, const Graph& graph, int lvl,
466  bool stop_at_first, std::vector<vertex_t>& result);
467 
476 
477  private:
478 
480 
481  struct SequenceToReplicateChargeVariantHierarchy;
482 
483 
484  //TODO introduce class hierarchy:
485  /*
486  * IDGraph<UnderlyingIDStruc>
487  *
488  * - BasicGraph<>
489  * - ExtendedGraphClustered<>
490  * - ExtendedGraphClusteredWithRunInfo<>
491  *
492  * in theory extending a basic one is desirable to create the extended one. But it means we have to
493  * copy/move the graph (node by node) because the nodes are of a broader boost::variant type. So we probably have to
494  * duplicate code and offer a from-scratch step-wise building for the extended graph, too.
495  * Note that there could be several levels of extension in the future. For now I keep everything in one
496  * class by having potential storage for the broadest extended type. Differences in the underlying ID structure
497  * e.g. ConsensusMap or PeptideIDs from idXML currently only have an effect during building, so I just overload
498  * the constructors. In theory it would be nice to generalize on that, too, especially when we adapt to the new
499  * ID data structure.
500  */
501 
502 
503  /* ---------------- Either of them is used, preferably second --------------- */
506 
509  /* ---------------------------------------------------------------------------- */
510 
511  #ifdef INFERENCE_BENCH
513  std::vector<std::tuple<vertex_t, vertex_t, unsigned long, double>> sizes_and_times_{1};
514  #endif
515 
516 
517  /* ---- Only used when run information was available --------- */
518 
519  //TODO think about preallocating it, but the number of peptide hits is not easily computed
520  // since they are inside the pepIDs
521  //TODO would multiple sets be better?
522 
525  std::unordered_map<vertex_t, Size> pepHitVtx_to_run_;
526 
531  Size nrPrefractionationGroups_ = 0;
532 
533  /* ----------------------------------------------------------- */
534 
535 
538  vertex_t addVertexWithLookup_(const IDPointer& ptr, std::unordered_map<IDPointer, vertex_t, boost::hash<IDPointer>>& vertex_map);
539  //vertex_t addVertexWithLookup_(IDPointerConst& ptr, std::unordered_map<IDPointerConst, vertex_t, boost::hash<IDPointerConst>>& vertex_map);
540 
541 
543  void annotateIndistProteins_(const Graph& fg, bool addSingletons);
544  void calculateAndAnnotateIndistProteins_(const Graph& fg, bool addSingletons);
545 
556  std::vector<PeptideIdentification>& idedSpectra,
557  Size use_top_psms,
558  bool best_psms_annotated = false);
559 
561  ConsensusMap& cmap,
562  Size use_top_psms,
563  bool use_unassigned_ids,
564  bool best_psms_annotated = false);
565 
568  PeptideIdentification& spectrum,
569  std::unordered_map<IDPointer, vertex_t, boost::hash<IDPointer>>& vertex_map,
570  const std::unordered_map<std::string, ProteinHit*>& accession_map,
571  Size use_top_psms,
572  bool best_psms_annotated);
573 
575  PeptideIdentification& spectrum,
576  std::unordered_map<unsigned, unsigned>& indexToPrefractionationGroup,
577  std::unordered_map<IDPointer, vertex_t, boost::hash<IDPointer>>& vertex_map,
578  std::unordered_map<std::string, ProteinHit*>& accession_map,
579  Size use_top_psms
580  );
581 
589  ConsensusMap& cmap,
590  Size use_top_psms,
591  bool use_unassigned_ids,
592  const ExperimentalDesign& ed);
593 
595  std::vector<PeptideIdentification>& idedSpectra,
596  Size use_top_psms,
597  const ExperimentalDesign& ed);
598 
599 
601  void resolveGraphPeptideCentric_(Graph& fg, bool removeAssociationsInData);
602 
603  template<class NodeType>
604  void getDownstreamNodes(const vertex_t& start, const Graph& graph, std::vector<NodeType>& result)
605  {
606  Graph::adjacency_iterator adjIt, adjIt_end;
607  boost::tie(adjIt, adjIt_end) = boost::adjacent_vertices(start, graph);
608  for (;adjIt != adjIt_end; ++adjIt)
609  {
610  if (graph[*adjIt].type() == typeid(NodeType))
611  {
612  result.emplace_back(boost::get<NodeType>(graph[*adjIt]));
613  }
614  else if (graph[*adjIt].which() > graph[start].which())
615  {
616  getDownstreamNodes(*adjIt, graph, result);
617  }
618  }
619  }
620 
621  template<class NodeType>
622  void getUpstreamNodes(const vertex_t& start, const Graph graph, std::vector<NodeType>& result)
623  {
624  Graph::adjacency_iterator adjIt, adjIt_end;
625  boost::tie(adjIt, adjIt_end) = boost::adjacent_vertices(start, graph);
626  for (;adjIt != adjIt_end; ++adjIt)
627  {
628  if (graph[*adjIt].type() == typeid(NodeType))
629  {
630  result.emplace_back(boost::get<NodeType>(graph[*adjIt]));
631  }
632  else if (graph[*adjIt].which() < graph[start].which())
633  {
634  getUpstreamNodes(*adjIt, graph, result);
635  }
636  }
637  }
638  };
639 
641  } //namespace Internal
642 } //namespace OpenMS
643 
String toString() const
returns the peptide as string with modifications embedded in brackets
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
A container for consensus elements.
Definition: ConsensusMap.h:92
String toString(bool full_precision=true) const
Conversion to String full_precision Controls number of fractional digits for all double types or list...
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition: ExperimentalDesign.h:245
Visits nodes in the boost graph (either ptrs to an ID Object or some lightweight surrogates) and depe...
Definition: IDBoostGraph.h:307
double operator()(PeptideHit *pep) const
Definition: IDBoostGraph.h:310
double operator()(ProteinHit *prot) const
Definition: IDBoostGraph.h:315
double operator()(T &) const
Definition: IDBoostGraph.h:327
double operator()(ProteinGroup &pg) const
Definition: IDBoostGraph.h:320
Visits nodes in the boost graph (either ptrs to an ID Object or some lightweight surrogates) and depe...
Definition: IDBoostGraph.h:339
std::pair< double, bool > operator()(T &) const
Definition: IDBoostGraph.h:359
std::pair< double, bool > operator()(ProteinGroup &pg) const
Definition: IDBoostGraph.h:352
std::pair< double, bool > operator()(PeptideHit *pep) const
Definition: IDBoostGraph.h:342
std::pair< double, bool > operator()(ProteinHit *prot) const
Definition: IDBoostGraph.h:347
Visits nodes in the boost graph (ptrs to an ID Object) and depending on their type creates a label e....
Definition: IDBoostGraph.h:181
OpenMS::String operator()(const Peptide &peptide) const
Definition: IDBoostGraph.h:204
OpenMS::String operator()(const Charge &chg) const
Definition: IDBoostGraph.h:214
OpenMS::String operator()(const PeptideHit *pep) const
Definition: IDBoostGraph.h:184
OpenMS::String operator()(const ProteinGroup &) const
Definition: IDBoostGraph.h:194
OpenMS::String operator()(const RunIndex &ri) const
Definition: IDBoostGraph.h:209
OpenMS::String operator()(const ProteinHit *prot) const
Definition: IDBoostGraph.h:189
OpenMS::String operator()(const PeptideCluster &) const
Definition: IDBoostGraph.h:199
Visits nodes in the boost graph (ptrs to an ID Object) and depending on their type prints the address...
Definition: IDBoostGraph.h:226
void operator()(const Charge &chg) const
Definition: IDBoostGraph.h:263
std::basic_ostream< CharT > stream_
Definition: IDBoostGraph.h:268
PrintAddressVisitor(std::basic_ostream< CharT > stream)
Definition: IDBoostGraph.h:229
void operator()(const PeptideCluster &) const
Definition: IDBoostGraph.h:248
void operator()(const RunIndex &ri) const
Definition: IDBoostGraph.h:258
void operator()(PeptideHit *pep) const
Definition: IDBoostGraph.h:233
void operator()(ProteinHit *prot) const
Definition: IDBoostGraph.h:238
void operator()(const Peptide &peptide) const
Definition: IDBoostGraph.h:253
void operator()(const ProteinGroup &) const
Definition: IDBoostGraph.h:243
Visits nodes in the boost graph (either ptrs to an ID Object or some lightweight surrogates) and depe...
Definition: IDBoostGraph.h:276
void operator()(T &, double) const
Definition: IDBoostGraph.h:296
void operator()(PeptideHit *pep, double posterior) const
Definition: IDBoostGraph.h:279
void operator()(ProteinGroup &pg, double posterior) const
Definition: IDBoostGraph.h:289
void operator()(ProteinHit *prot, double posterior) const
Definition: IDBoostGraph.h:284
A boost dfs visitor that copies connected components into a vector of graphs.
Definition: IDBoostGraph.h:135
std::map< vertex_t, vertex_t > m
A mapping from old node id to new node id to not duplicate existing ones in the new graph.
Definition: IDBoostGraph.h:174
void start_vertex(Vertex u, const Graph &tg)
Definition: IDBoostGraph.h:142
void examine_edge(Edge e, const Graph &tg)
Definition: IDBoostGraph.h:156
dfs_ccsplit_visitor(Graphs &vgs)
Definition: IDBoostGraph.h:137
vertex_t curr_v
Definition: IDBoostGraph.h:172
void discover_vertex(Vertex, const Graph &)
Definition: IDBoostGraph.h:150
Graphs & gs
Definition: IDBoostGraph.h:171
Creates and maintains a boost graph based on the OpenMS ID datastructures.
Definition: IDBoostGraph.h:83
void addPeptideIDWithAssociatedProteins_(PeptideIdentification &spectrum, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map, const std::unordered_map< std::string, ProteinHit * > &accession_map, Size use_top_psms, bool best_psms_annotated)
Used during building.
const ProteinIdentification & getProteinIDs()
Returns the underlying protein identifications for viewing.
void buildGraphWithRunInfo_(ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms, const ExperimentalDesign &ed)
void buildGraph_(ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms, bool best_psms_annotated=false)
boost::graph_traits< Graph >::vertex_descriptor vertex_t
Definition: IDBoostGraph.h:125
BOOST_STRONG_TYPEDEF(boost::blank, PeptideCluster)
placeholder for peptides with the same parent proteins or protein groups
BOOST_STRONG_TYPEDEF(int, Charge)
in which charge state a PSM was observed
std::unordered_map< vertex_t, Size > pepHitVtx_to_run_
Definition: IDBoostGraph.h:525
boost::variant< const ProteinHit *, const ProteinGroup *, const PeptideCluster *, const Peptide, const RunIndex, const Charge, const PeptideHit * > IDPointerConst
Definition: IDBoostGraph.h:116
void addPeptideAndAssociatedProteinsWithRunInfo_(PeptideIdentification &spectrum, std::unordered_map< unsigned, unsigned > &indexToPrefractionationGroup, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map, std::unordered_map< std::string, ProteinHit * > &accession_map, Size use_top_psms)
std::vector< Graph > Graphs
Definition: IDBoostGraph.h:122
double score
Definition: IDBoostGraph.h:99
IDBoostGraph(ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_run_info, bool use_unassigned_ids, bool best_psms_annotated, const std::optional< const ExperimentalDesign > &ed=std::optional< const ExperimentalDesign >())
void getDownstreamNodes(const vertex_t &start, const Graph &graph, std::vector< NodeType > &result)
Definition: IDBoostGraph.h:604
ProteinIdentification & protIDs_
Definition: IDBoostGraph.h:479
void getUpstreamNodes(const vertex_t &start, const Graph graph, std::vector< NodeType > &result)
Definition: IDBoostGraph.h:622
void computeConnectedComponents()
Splits the initialized graph into connected components and clears it.
void getProteinGroupScoresAndHitchhikingTgtFraction(ScoreToTgtDecLabelPairs &scores_and_tgt_fraction)
int tgts
Definition: IDBoostGraph.h:98
Size getNrConnectedComponents()
Zero means the graph was not split yet.
void resolveGraphPeptideCentric_(Graph &fg, bool removeAssociationsInData)
see equivalent public method
void getUpstreamNodesNonRecursive(std::queue< vertex_t > &q, const Graph &graph, int lvl, bool stop_at_first, std::vector< vertex_t > &result)
Searches for all upstream nodes from a (set of) start nodes that are lower or equal than a given leve...
const Graph & getComponent(Size cc)
Returns a specific connected component of the graph as a graph itself.
void applyFunctorOnCCsST(const std::function< void(Graph &)> &functor)
Do sth on connected components single threaded (your functor object has to inherit from std::function...
Graph g
the initial boost Graph (will be cleared when split into CCs)
Definition: IDBoostGraph.h:481
void annotateIndistProteins_(const Graph &fg, bool addSingletons)
internal function to annotate the underlying ID structures based on the given Graph
void clusterIndistProteinsAndPeptidesAndExtendGraph()
std::set< IDBoostGraph::vertex_t > PeptideNodeSet
Definition: IDBoostGraph.h:129
std::set< IDBoostGraph::vertex_t > ProteinNodeSet
Definition: IDBoostGraph.h:128
void buildGraphWithRunInfo_(ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_unassigned_ids, const ExperimentalDesign &ed)
boost::adjacency_list< boost::setS, boost::vecS, boost::undirectedS, IDPointer > GraphConst
Definition: IDBoostGraph.h:123
void calculateAndAnnotateIndistProteins(bool addSingletons=true)
static void printGraph(std::ostream &out, const Graph &fg)
Prints a graph (component or if not split, the full graph) in graphviz (i.e. dot) format.
void calculateAndAnnotateIndistProteins_(const Graph &fg, bool addSingletons)
boost::graph_traits< Graph >::edge_descriptor edge_t
Definition: IDBoostGraph.h:126
BOOST_STRONG_TYPEDEF(String, Peptide)
an (currently unmodified) peptide sequence
void annotateIndistProteins(bool addSingletons=true)
BOOST_STRONG_TYPEDEF(Size, RunIndex)
in which run a PSM was observed
void resolveGraphPeptideCentric(bool removeAssociationsInData=true)
IDBoostGraph(ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms, bool use_run_info, bool best_psms_annotated, const std::optional< const ExperimentalDesign > &ed=std::optional< const ExperimentalDesign >())
Constructors.
void getProteinGroupScoresAndTgtFraction(ScoreToTgtDecLabelPairs &scores_and_tgt_fraction)
void buildGraph_(ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_unassigned_ids, bool best_psms_annotated=false)
void getProteinScores_(ScoreToTgtDecLabelPairs &scores_and_tgt)
boost::adjacency_list< boost::setS, boost::vecS, boost::undirectedS, IDPointer > Graph
Definition: IDBoostGraph.h:121
void getDownstreamNodesNonRecursive(std::queue< vertex_t > &q, const Graph &graph, int lvl, bool stop_at_first, std::vector< vertex_t > &result)
Searches for all downstream nodes from a (set of) start nodes that are higher or equal than a given l...
boost::variant< ProteinHit *, ProteinGroup, PeptideCluster, Peptide, RunIndex, Charge, PeptideHit * > IDPointer
Definition: IDBoostGraph.h:115
void applyFunctorOnCCs(const std::function< unsigned long(Graph &, unsigned int)> &functor)
Do sth on connected components (your functor object has to inherit from std::function or be a lambda)
Graphs ccs_
the Graph split into connected components
Definition: IDBoostGraph.h:508
vertex_t addVertexWithLookup_(const IDPointer &ptr, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map)
indistinguishable protein groups (size, nr targets, score)
Definition: IDBoostGraph.h:96
const DataValue & getMetaValue(const String &name) const
Returns the value corresponding to a string, or DataValue::EMPTY if not found.
Representation of a peptide hit.
Definition: PeptideHit.h:57
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence
Int getCharge() const
returns the charge of the peptide
void setScore(double score)
sets the PSM score
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
Representation of a protein hit.
Definition: ProteinHit.h:60
double getScore() const
returns the score of the protein hit
void setScore(const double score)
sets the score of the protein hit
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:76
A more convenient string class.
Definition: String.h:60
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
Definition: IDScoreGetterSetter.h:57