OpenMS
NASequence.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Samuel Wein $
32 // $Authors: Samuel Wein, Timo Sachsenberg, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
39 #include <OpenMS/CONCEPT/Types.h>
41 #include <iosfwd>
42 #include <vector>
43 
44 namespace OpenMS
45 {
59  class OPENMS_DLLAPI NASequence
60  {
65  public:
67  { //< NB: Not all fragments types are valid for all residue types, this class should probably get split
68  Full = 0,
72  AIon,
73  BIon,
74  CIon,
75  XIon,
76  YIon,
77  ZIon,
85  WIon,
87  DIon,
88  SizeOfNASFragmentType
89  };
90 
92 
93  class Iterator;
94 
100  class OPENMS_DLLAPI ConstIterator
101  {
102  public:
104  typedef const value_type& const_reference;
106  typedef const value_type* const_pointer;
107  typedef std::vector<const value_type*>::difference_type difference_type;
108  typedef const value_type* pointer;
109  typedef std::random_access_iterator_tag iterator_category;
110 
115  ConstIterator() = default;
116 
118  ConstIterator(const std::vector<const Ribonucleotide*>* vec_ptr, difference_type position)
119  {
120  vector_ = vec_ptr;
121  position_ = position;
122  }
123 
125  ConstIterator(const ConstIterator& rhs) : vector_(rhs.vector_), position_(rhs.position_)
126  {
127  }
128 
130  ConstIterator(const NASequence::Iterator& rhs) : vector_(rhs.vector_), position_(rhs.position_)
131  {
132  }
133 
135  virtual ~ConstIterator()
136  {
137  }
138 
140 
143  {
144  if (this != &rhs)
145  {
146  position_ = rhs.position_;
147  vector_ = rhs.vector_;
148  }
149  return *this;
150  }
151 
157  {
158  return *(*vector_)[position_];
159  }
160 
163  {
164  return (*vector_)[position_];
165  }
166 
169  {
170  return ConstIterator(vector_, position_ + diff);
171  }
172 
174  {
175  return position_ - rhs.position_;
176  }
177 
180  {
181  return ConstIterator(vector_, position_ - diff);
182  }
183 
185  bool operator==(const ConstIterator& rhs) const
186  {
187  return (std::tie(vector_, position_) == std::tie(rhs.vector_, rhs.position_));
188  }
189 
191  bool operator!=(const ConstIterator& rhs) const
192  {
193  return !(operator==(rhs));
194  }
195 
198  {
199  ++position_;
200  return *this;
201  }
202 
205  {
206  --position_;
207  return *this;
208  }
209 
211 
212  protected:
213  // pointer to the vector
214  const std::vector<const Ribonucleotide*>* vector_;
215 
216  // position in the vector
218  };
219 
220 
226  class OPENMS_DLLAPI Iterator
227  {
228  public:
230 
232  typedef const value_type& const_reference;
234  typedef const value_type* const_pointer;
235  typedef const value_type* pointer;
236  typedef std::vector<const value_type*>::difference_type difference_type;
237 
241  Iterator() = default;
242 
244  Iterator(std::vector<const Ribonucleotide*>* vec_ptr, difference_type position)
245  {
246  vector_ = vec_ptr;
247  position_ = position;
248  }
249 
251  Iterator(const Iterator& rhs) : vector_(rhs.vector_), position_(rhs.position_)
252  {
253  }
254 
256  virtual ~Iterator()
257  {
258  }
259 
261 
264  {
265  if (this != &rhs)
266  {
267  position_ = rhs.position_;
268  vector_ = rhs.vector_;
269  }
270  return *this;
271  }
272 
278  {
279  return *(*vector_)[position_];
280  }
281 
284  {
285  return (*vector_)[position_];
286  }
287 
290  {
291  return (*vector_)[position_];
292  }
293 
296  {
297  return Iterator(vector_, position_ + diff);
298  }
299 
301  {
302  return position_ - rhs.position_;
303  }
304 
307  {
308  return Iterator(vector_, position_ - diff);
309  }
310 
312  bool operator==(const Iterator& rhs) const
313  {
314  return (std::tie(vector_, position_) == std::tie(rhs.vector_, rhs.position_));
315  }
316 
318  bool operator!=(const Iterator& rhs) const
319  {
320  return !this->operator==(rhs);
321  }
322 
325  {
326  ++position_;
327  return *this;
328  }
329 
332  {
333  --position_;
334  return *this;
335  }
336 
338 
339  protected:
340  std::vector<const Ribonucleotide*>* vector_;
341 
342  // position in the vector
344  };
345 
346  public:
347  /*
348  * Default constructors and assignment operators.
349  */
350  NASequence() = default;
351  NASequence(const NASequence&) = default;
352  NASequence(NASequence&&) = default;
353  NASequence& operator=(const NASequence&) & = default;
354  NASequence& operator=(NASequence&&) & = default;
355 
357  NASequence(std::vector<const Ribonucleotide*> s, const RibonucleotideChainEnd* five_prime, const RibonucleotideChainEnd* three_prime);
358 
359  virtual ~NASequence() = default;
360 
361  bool operator==(const NASequence& rhs) const;
362  bool operator!=(const NASequence& rhs) const;
363  bool operator<(const NASequence& rhs) const;
364 
366  void setSequence(const std::vector<const Ribonucleotide*>& seq);
367 
368  const std::vector<const Ribonucleotide*>& getSequence() const
369  {
370  return seq_;
371  }
372 
373  std::vector<const Ribonucleotide*>& getSequence()
374  {
375  return seq_;
376  }
377 
379  void set(size_t index, const Ribonucleotide* r);
380 
381  const Ribonucleotide* get(size_t index)
382  {
383  return seq_[index];
384  }
385 
387  inline const Ribonucleotide*& operator[](size_t index)
388  {
389  return seq_[index];
390  }
391 
392  inline const Ribonucleotide* const& operator[](size_t index) const
393  {
394  return seq_[index];
395  }
396 
397  bool empty() const;
398  size_t size() const;
399  void clear();
400 
402  bool hasFivePrimeMod() const;
405  bool hasThreePrimeMod() const;
408 
410  inline Iterator begin()
411  {
412  return Iterator(&seq_, 0);
413  }
414 
415  inline ConstIterator begin() const
416  {
417  return ConstIterator(&seq_, 0);
418  }
419 
420  inline Iterator end()
421  {
422  return Iterator(&seq_, (Int)seq_.size());
423  }
424 
425  inline ConstIterator end() const
426  {
427  return ConstIterator(&seq_, (Int)seq_.size());
428  }
429 
430  inline ConstIterator cbegin() const
431  {
432  return ConstIterator(&seq_, 0);
433  }
434 
435  inline ConstIterator cend() const
436  {
437  return ConstIterator(&seq_, (Int)seq_.size());
438  }
439 
441 
450  double getMonoWeight(NASFragmentType type = Full, Int charge = 0) const;
451 
460  double getAverageWeight(NASFragmentType type = Full, Int charge = 0) const;
461 
470  EmpiricalFormula getFormula(NASFragmentType type = Full, Int charge = 0) const;
471 
479  NASequence getPrefix(Size length) const;
480 
488  NASequence getSuffix(Size length) const;
489 
498  NASequence getSubsequence(Size start = 0, Size length = Size(-1)) const;
499 
507  static NASequence fromString(const String& s);
508 
512  friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const NASequence& seq);
513 
521  static NASequence fromString(const char* s);
522 
523  std::string toString() const;
524 
525  private:
526  // TODO: query RNA / DNA depending on type
527  static void parseString_(const String& s, NASequence& nas);
528 
538  // TODO: query RNA / DNA depending on type
540 
541  std::vector<const Ribonucleotide*> seq_;
542 
543  const RibonucleotideChainEnd* five_prime_ = nullptr;
544  const RibonucleotideChainEnd* three_prime_ = nullptr;
545  };
546 
547 } // namespace OpenMS
Representation of an empirical formula.
Definition: EmpiricalFormula.h:85
ConstIterator of NASequence class.
Definition: NASequence.h:101
ConstIterator(const std::vector< const Ribonucleotide * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition: NASequence.h:118
const_pointer operator->() const
dereference operator
Definition: NASequence.h:162
const value_type * const_pointer
Definition: NASequence.h:106
const ConstIterator operator-(difference_type diff) const
backward jump operator
Definition: NASequence.h:179
ConstIterator & operator=(const ConstIterator &rhs)
assignment operator
Definition: NASequence.h:142
bool operator!=(const ConstIterator &rhs) const
inequality operator
Definition: NASequence.h:191
const ConstIterator operator+(difference_type diff) const
forward jump operator
Definition: NASequence.h:168
ConstIterator()=default
default constructor
ConstIterator & operator--()
decrement operator
Definition: NASequence.h:204
std::random_access_iterator_tag iterator_category
Definition: NASequence.h:109
bool operator==(const ConstIterator &rhs) const
equality comparator
Definition: NASequence.h:185
Ribonucleotide value_type
Definition: NASequence.h:103
ConstIterator & operator++()
increment operator
Definition: NASequence.h:197
ConstIterator(const NASequence::Iterator &rhs)
copy constructor from Iterator
Definition: NASequence.h:130
difference_type position_
Definition: NASequence.h:217
std::vector< const value_type * >::difference_type difference_type
Definition: NASequence.h:107
ConstIterator(const ConstIterator &rhs)
copy constructor
Definition: NASequence.h:125
value_type & reference
Definition: NASequence.h:105
const std::vector< const Ribonucleotide * > * vector_
Definition: NASequence.h:214
const value_type * pointer
Definition: NASequence.h:108
const_reference operator*() const
dereference operator
Definition: NASequence.h:156
virtual ~ConstIterator()
destructor
Definition: NASequence.h:135
difference_type operator-(ConstIterator rhs) const
Definition: NASequence.h:173
const value_type & const_reference
Definition: NASequence.h:104
Iterator of NASequence class.
Definition: NASequence.h:227
const_pointer operator->() const
dereference operator
Definition: NASequence.h:283
const value_type * const_pointer
Definition: NASequence.h:234
pointer operator->()
mutable dereference operator
Definition: NASequence.h:289
const Iterator operator+(difference_type diff) const
forward jump operator
Definition: NASequence.h:295
Iterator & operator--()
decrement operator
Definition: NASequence.h:331
virtual ~Iterator()
destructor
Definition: NASequence.h:256
Iterator(std::vector< const Ribonucleotide * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition: NASequence.h:244
difference_type operator-(Iterator rhs) const
Definition: NASequence.h:300
Iterator & operator=(const Iterator &rhs)
assignment operator
Definition: NASequence.h:263
Ribonucleotide value_type
Definition: NASequence.h:231
bool operator==(const Iterator &rhs) const
equality comparator
Definition: NASequence.h:312
difference_type position_
Definition: NASequence.h:343
std::vector< const value_type * >::difference_type difference_type
Definition: NASequence.h:236
value_type & reference
Definition: NASequence.h:233
Iterator(const Iterator &rhs)
copy constructor
Definition: NASequence.h:251
const value_type * pointer
Definition: NASequence.h:235
const_reference operator*() const
dereference operator
Definition: NASequence.h:277
bool operator!=(const Iterator &rhs) const
inequality operator
Definition: NASequence.h:318
const Iterator operator-(difference_type diff) const
backward jump operator
Definition: NASequence.h:306
Iterator & operator++()
increment operator
Definition: NASequence.h:324
std::vector< const Ribonucleotide * > * vector_
Definition: NASequence.h:340
const value_type & const_reference
Definition: NASequence.h:232
Representation of a nucleic acid sequence.
Definition: NASequence.h:60
ConstIterator cend() const
Definition: NASequence.h:435
virtual ~NASequence()=default
bool operator<(const NASequence &rhs) const
less operator
bool hasThreePrimeMod() const
NASequence getSuffix(Size length) const
Return sequence suffix of the given length (not start index!)
void setSequence(const std::vector< const Ribonucleotide * > &seq)
getter / setter for sequence
std::string toString() const
Iterator begin()
iterators
Definition: NASequence.h:410
size_t size() const
bool operator==(const NASequence &rhs) const
destructor
bool hasFivePrimeMod() const
5' and 3' modifications
const std::vector< const Ribonucleotide * > & getSequence() const
Definition: NASequence.h:368
double getMonoWeight(NASFragmentType type=Full, Int charge=0) const
utility functions
friend std::ostream & operator<<(std::ostream &os, const NASequence &seq)
void setThreePrimeMod(const RibonucleotideChainEnd *r)
NASequence getPrefix(Size length) const
Return sequence prefix of the given length (not end index!)
static void parseString_(const String &s, NASequence &nas)
NASequence()=default
bool empty() const
ConstIterator end() const
Definition: NASequence.h:425
const RibonucleotideChainEnd * getThreePrimeMod() const
bool operator!=(const NASequence &rhs) const
not quality
NASequence(std::vector< const Ribonucleotide * > s, const RibonucleotideChainEnd *five_prime, const RibonucleotideChainEnd *three_prime)
full constructor
void set(size_t index, const Ribonucleotide *r)
getter / setter for ribonucleotide elements (easily wrapped using pyOpenMS)
static NASequence fromString(const char *s)
create NASequence object by parsing a C string (character array)
EmpiricalFormula getFormula(NASFragmentType type=Full, Int charge=0) const
Get the formula for a NASequence.
NASFragmentType
an enum of all possible fragment ion types
Definition: NASequence.h:67
@ AminusB
A ion with base loss, added for nucleic acid support.
Definition: NASequence.h:86
@ YIon
MS:1001220 peptide bond up to the C-terminus.
Definition: NASequence.h:76
@ XIon
MS:1001228 amide/C-alpha bond up to the C-terminus.
Definition: NASequence.h:75
@ ZIon
MS:1001230 C-alpha/carbonyl carbon bond.
Definition: NASequence.h:77
@ WIon
W ion, added for nucleic acid support.
Definition: NASequence.h:85
@ BIonMinusH20
MS:1001222 b ion without water.
Definition: NASequence.h:79
@ BIonMinusNH3
MS:1001232 b ion without ammonia.
Definition: NASequence.h:81
@ AIon
MS:1001229 N-terminus up to the C-alpha/carbonyl carbon bond.
Definition: NASequence.h:72
@ Precursor
MS:1001523 Precursor ion.
Definition: NASequence.h:78
@ YIonMinusH20
MS:1001223 y ion without water.
Definition: NASequence.h:80
@ NonIdentified
MS:1001240 Non-identified ion.
Definition: NASequence.h:83
@ BIon
MS:1001224 N-terminus up to the peptide bond.
Definition: NASequence.h:73
@ ThreePrime
only 3' terminus
Definition: NASequence.h:71
@ CIon
MS:1001231 N-terminus up to the amide/C-alpha bond.
Definition: NASequence.h:74
@ YIonMinusNH3
MS:1001233 y ion without ammonia.
Definition: NASequence.h:82
@ Internal
internal, without any termini
Definition: NASequence.h:69
@ Unannotated
no stored annotation
Definition: NASequence.h:84
@ FivePrime
only 5' terminus
Definition: NASequence.h:70
@ DIon
D ion, added for nucleic acid support.
Definition: NASequence.h:87
static NASequence fromString(const String &s)
create NASequence object by parsing an OpenMS string
const Ribonucleotide * get(size_t index)
Definition: NASequence.h:381
static String::ConstIterator parseMod_(const String::ConstIterator str_it, const String &str, NASequence &nas)
Parses modifications in square brackets.
NASequence & operator=(NASequence &&) &=default
Move assignment operator.
ConstIterator cbegin() const
Definition: NASequence.h:430
double getAverageWeight(NASFragmentType type=Full, Int charge=0) const
Get the Average Weight of a NASequence. NB returns the uncharged mass + or - proton masses to match t...
Iterator end()
Definition: NASequence.h:420
std::vector< const Ribonucleotide * > & getSequence()
Definition: NASequence.h:373
NASequence getSubsequence(Size start=0, Size length=Size(-1)) const
Return subsequence with given starting position and length.
const Ribonucleotide *& operator[](size_t index)
getter / setter for sequence elements (C++ container style)
Definition: NASequence.h:387
const RibonucleotideChainEnd * getFivePrimeMod() const
std::vector< const Ribonucleotide * > seq_
Definition: NASequence.h:541
void setFivePrimeMod(const RibonucleotideChainEnd *r)
ConstIterator begin() const
Definition: NASequence.h:415
const Ribonucleotide *const & operator[](size_t index) const
Definition: NASequence.h:392
NASequence & operator=(const NASequence &) &=default
Copy assignment operator.
NASequence(NASequence &&)=default
Move constructor.
NASequence(const NASequence &)=default
default constructor
Representation of a ribonucleotide (modified or unmodified)
Definition: Ribonucleotide.h:52
A more convenient string class.
Definition: String.h:60
const_iterator ConstIterator
Const Iterator.
Definition: String.h:72
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48