OpenMS
StringUtilsSimple.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg, Chris Bielow $
32 // $Authors: Marc Sturm, Stephan Aiche, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Types.h>
40 
41 #include <string>
42 #include <sstream>
43 #include <vector>
44 #include <cmath>
45 #include <algorithm>
46 
47 namespace OpenMS
48 {
49  class String;
50 
51  namespace StringUtils
52  {
53 
54  //
56  //
57  static inline String numberLength(double d, UInt n)
58  {
59  std::stringstream s;
60  //reserve one space for the minus sign
61  Int sign = 0;
62  if (d < 0)
63  sign = 1;
64  d = fabs(d);
65 
66  if (d < pow(10.0, Int(n - sign - 2)))
67  {
68  s.precision(writtenDigits(d));
69  if (sign == 1)
70  s << "-";
71  s << d;
72  }
73  else
74  {
75  UInt exp = 0;
76  while (d > pow(10.0, Int(n - sign - 4)))
77  {
78  d /= 10;
79  ++exp;
80  }
81  d = Int(d) / 10.0;
82  exp += 1;
83  if (sign == 1)
84  s << "-";
85  s << d << "e";
86  if (exp < 10)
87  s << "0";
88  s << exp;
89  }
90  return s.str().substr(0, n);
91  }
92 
93  static inline String& fillLeft(String & this_s, char c, UInt size)
94  {
95  if (this_s.size() < size)
96  {
97  this_s.std::string::operator=(String(size - this_s.size(), c) + this_s);
98  }
99  return this_s;
100  }
101 
102  static inline String& fillRight(String & this_s, char c, UInt size)
103  {
104  if (this_s.size() < size)
105  {
106  this_s.std::string::operator=(this_s + String(size - this_s.size(), c));
107  }
108  return this_s;
109  }
110 
111  static inline bool hasPrefix(const String & this_s, const String & string)
112  {
113  if (string.size() > this_s.size())
114  {
115  return false;
116  }
117  if (string.empty())
118  {
119  return true;
120  }
121  return this_s.compare(0, string.size(), string) == 0;
122  }
123 
124  static inline bool hasSuffix(const String & this_s, const String& string)
125  {
126  if (string.size() > this_s.size())
127  {
128  return false;
129  }
130  if (string.empty())
131  {
132  return true;
133  }
134  return this_s.compare(this_s.size() - string.size(), string.size(), string) == 0;
135  }
136 
137  static inline bool hasSubstring(const String & this_s, const String& string)
138  {
139  return this_s.find(string) != std::string::npos;
140  }
141 
142  static inline bool has(const String & this_s, Byte byte)
143  {
144  return this_s.find(char(byte)) != std::string::npos;
145  }
146 
147  static inline String prefix(const String & this_s, size_t length)
148  {
149  if (length > this_s.size())
150  {
151  throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
152  }
153  return this_s.substr(0, length);
154  }
155 
156  static inline String suffix(const String & this_s, size_t length)
157  {
158  if (length > this_s.size())
159  {
160  throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
161  }
162  return this_s.substr(this_s.size() - length, length);
163  }
164 
165  static inline String prefix(const String & this_s, Int length)
166  {
167  if (length < 0)
168  {
169  throw Exception::IndexUnderflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, 0);
170  }
171  if (length > Int(this_s.size()))
172  {
173  throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
174  }
175  return this_s.substr(0, length);
176  }
177 
178  static inline String suffix(const String & this_s, Int length)
179  {
180  if (length < 0)
181  {
182  throw Exception::IndexUnderflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, 0);
183  }
184  if (length > Int(this_s.size()))
185  {
186  throw Exception::IndexOverflow(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, length, this_s.size());
187  }
188  return this_s.substr(this_s.size() - length, length);
189  }
190 
191  static inline String prefix(const String & this_s, char delim)
192  {
193  Size pos = this_s.find(delim);
194  if (pos == std::string::npos) //char not found
195  {
196  throw Exception::ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
197  String(delim));
198  }
199  return this_s.substr(0, pos);
200  }
201 
202  static inline String suffix(const String & this_s, char delim)
203  {
204  Size pos = this_s.rfind(delim);
205  if (pos == std::string::npos) //char not found
206  {
207  throw Exception::ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
208  String(delim));
209  }
210  return this_s.substr(++pos);
211  }
212 
213  static inline String substr(const String & this_s, size_t pos, size_t n)
214  {
215  Size begin = std::min(pos, this_s.size());
216  return static_cast<String>(this_s.std::string::substr(begin, n));
217  }
218 
219  static inline String chop(const String & this_s, Size n)
220  {
221  Size end = 0;
222  if (n < this_s.size())
223  {
224  end = this_s.size() - n;
225  }
226  return String(this_s.begin(), this_s.begin() + end);
227  }
228 
229  static inline String& trim(String & this_s)
230  {
231  //search for the begin of truncated string
232  std::string::iterator begin = this_s.begin();
233  while (begin != this_s.end() && (*begin == ' ' || *begin == '\t' || *begin == '\n' || *begin == '\r'))
234  {
235  ++begin;
236  }
237 
238  //all characters are whitespaces
239  if (begin == this_s.end())
240  {
241  this_s.clear();
242  return this_s;
243  }
244 
245  //search for the end of truncated string
246  std::string::iterator end = this_s.end();
247  end--;
248  while (end != begin && (*end == ' ' || *end == '\n' || *end == '\t' || *end == '\r'))
249  {
250  --end;
251  }
252  ++end;
253 
254  //no characters are whitespaces
255  if (begin == this_s.begin() && end == this_s.end())
256  {
257  return this_s;
258  }
259 
260  // TODO:
261  // string::operator=(std::string(begin, end));
262  this_s.std::string::operator=(std::string(begin, end));
263 
264  return this_s;
265  }
266 
267  static inline bool isQuoted(const String & this_s, char q)
268  {
269  return (this_s.size() < 2) || (this_s[0] != q) || (this_s[this_s.size() - 1] != q);
270  }
271 
272  static inline String& quote(String & this_s, char q, String::QuotingMethod method)
273  {
274  if (method == String::ESCAPE)
275  {
276  this_s.substitute(String(R"(\)"), String(R"(\\)"));
277  this_s.substitute(String(q), R"(\)" + String(q));
278  }
279  else if (method == String::DOUBLE)
280  this_s.substitute(String(q), String(q) + String(q));
281  this_s.std::string::operator=(q + this_s + q);
282  return this_s;
283  }
284 
285  static inline String& unquote(String & this_s, char q, String::QuotingMethod method)
286  {
287  // check if input string matches output format of the "quote" method:
288  if (isQuoted(this_s, q))
289  {
291  __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
292  "'" + this_s + "' does not have the expected format of a quoted string");
293  }
294  this_s.std::string::operator=(this_s.substr(1, this_s.size() - 2)); // remove quotation marks
295  if (method == String::ESCAPE)
296  {
297  this_s.substitute(R"(\)" + String(q), String(q));
298  this_s.substitute(String(R"(\\)"), String(R"(\)"));
299  }
300  else if (method == String::DOUBLE)
301  this_s.substitute(String(q) + String(q), String(q));
302  return this_s;
303  }
304 
305  static inline String& simplify(String & this_s)
306  {
307  String simple;
308 
309  bool last_was_whitespace = false;
310  for (std::string::iterator it = this_s.begin(); it != this_s.end(); ++it)
311  {
312  if (*it == ' ' || *it == '\n' || *it == '\t' || *it == '\r')
313  {
314  if (!last_was_whitespace)
315  {
316  simple += ' ';
317  }
318  last_was_whitespace = true;
319  }
320  else
321  {
322  simple += *it;
323  last_was_whitespace = false;
324  }
325  }
326 
327  this_s.swap(simple);
328  return this_s;
329  }
330 
331  static inline String random(UInt length)
332  {
333  srand(time(nullptr));
334  String tmp(length, '.');
335  size_t random;
336  for (Size i = 0; i < length; ++i)
337  {
338  random = static_cast<size_t>(floor((static_cast<double>(rand()) / (double(RAND_MAX) + 1)) * 62.0));
339  if (random < 10)
340  {
341  tmp[i] = static_cast<char>(random + 48);
342  }
343  else if (random < 36)
344  {
345  tmp[i] = static_cast<char>(random + 55);
346  }
347  else
348  {
349  tmp[i] = static_cast<char>(random + 61);
350  }
351  }
352  return tmp;
353  }
354 
355  static inline String& reverse(String & this_s)
356  {
357  String tmp = this_s;
358  for (Size i = 0; i != this_s.size(); ++i)
359  {
360  this_s[i] = tmp[this_s.size() - 1 - i];
361  }
362  return this_s;
363  }
364 
365  static inline bool split(const String & this_s, const char splitter, std::vector<String>& substrings,
366  bool quote_protect)
367  {
368  substrings.clear();
369  if (this_s.empty())
370  return false;
371 
372  Size nsplits = count(this_s.begin(), this_s.end(), splitter);
373 
374  if (!quote_protect && (nsplits == 0))
375  {
376  substrings.push_back(this_s);
377  return false;
378  }
379 
380  // splitter(s) found
381  substrings.reserve(nsplits + 1);
382 
383  // why is "this_s." needed here?
384  std::string::const_iterator begin = this_s.begin();
385  std::string::const_iterator end = this_s.begin();
386 
387  if (quote_protect)
388  {
389  Int quote_count(0);
390  for (; end != this_s.end(); ++end)
391  {
392  if (*end == '"')
393  {
394  ++quote_count;
395  }
396  if ((quote_count % 2 == 0) && (*end == splitter))
397  {
398  String block = String(begin, end);
399  block.trim();
400  if ((block.size() >= 2) && ((block.prefix(1) == String("\"")) ^
401  (block.suffix(1) == String("\""))))
402  { // block has start or end quote, but not both
403  // (one quote is somewhere in the middle)
405  __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
406  String("Could not dequote string '") + block +
407  "' due to wrongly placed '\"'.");
408  }
409  else if ((block.size() >= 2) && (block.prefix(1) == String("\"")) &&
410  (block.suffix(1) == String("\"")))
411  { // block has start and end quotes --> remove them
412  block = block.substr(1, block.size() - 2);
413  }
414  substrings.push_back(block);
415  begin = end + 1;
416  }
417  }
418  // no valid splitter found - return empty list
419  if (substrings.empty())
420  {
421  substrings.push_back(this_s);
422  return false;
423  }
424 
425  String block = String(begin, end);
426  block.trim();
427  if ((block.size() >= 2) && ((block.prefix(1) == String("\"")) ^
428  (block.suffix(1) == String("\""))))
429  { // block has start or end quote but not both
430  // (one quote is somewhere in the middle)
432  __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
433  String("Could not dequote string '") + block +
434  "' due to wrongly placed '\"'.");
435  }
436  else if ((block.size() >= 2) && (block.prefix(1) == String("\"")) &&
437  (block.suffix(1) == String("\"")))
438  { // block has start and end quotes --> remove them
439  block = block.substr(1, block.size() - 2);
440  }
441  substrings.push_back(block);
442  }
443  else // do not honor quotes
444  {
445  for (; end != this_s.end(); ++end)
446  {
447  if (*end == splitter)
448  {
449  substrings.push_back(String(begin, end));
450  begin = end + 1;
451  }
452  }
453  substrings.push_back(String(begin, end));
454  }
455 
456  // at this point we are sure that there are at least two components
457  return true;
458  }
459 
460  static inline bool split(const String & this_s, const String& splitter, std::vector<String>& substrings)
461  {
462  substrings.clear();
463  if (this_s.empty())
464  return false;
465 
466  if (splitter.empty()) // split after every character:
467  {
468  substrings.resize(this_s.size());
469  for (Size i = 0; i < this_s.size(); ++i)
470  substrings[i] = this_s[i];
471  return true;
472  }
473 
474  Size len = splitter.size(), start = 0, pos = this_s.find(splitter);
475  if (len == 0)
476  len = 1;
477  while (pos != std::string::npos)
478  {
479  substrings.push_back(this_s.substr(start, pos - start));
480  start = pos + len;
481  pos = this_s.find(splitter, start);
482  }
483  substrings.push_back(this_s.substr(start, this_s.size() - start));
484  return substrings.size() > 1;
485  }
486 
487  static inline bool split_quoted(const String & this_s, const String& splitter, std::vector<String>& substrings,
488  char q, String::QuotingMethod method)
489  {
490  substrings.clear();
491  if (this_s.empty() || splitter.empty())
492  return false;
493 
494  bool in_quote = false;
495  char targets[2] = {q, splitter[0]}; // targets for "find_first_of"
496  std::string rest = splitter.substr(1, splitter.size() - 1);
497  Size start = 0;
498  for (Size i = 0; i < this_s.size(); ++i)
499  {
500  if (in_quote) // skip to closing quotation mark
501  {
502  bool embedded = false;
503  if (method == String::ESCAPE)
504  {
505  for (; i < this_s.size(); ++i)
506  {
507  if (this_s[i] == '\\')
508  embedded = !embedded;
509  else if ((this_s[i] == q) && !embedded)
510  break;
511  else
512  embedded = false;
513  }
514  }
515  else // method: NONE or DOUBLE
516  {
517  for (; i < this_s.size(); ++i)
518  {
519  if (this_s[i] == q)
520  {
521  if (method == String::NONE)
522  break; // found
523  // next character is also closing quotation mark:
524  if ((i < this_s.size() - 1) && (this_s[i + 1] == q))
525  embedded = !embedded;
526  // even number of subsequent quotes (doubled) => found
527  else if (!embedded)
528  break;
529  // odd number of subsequent quotes => belongs to a pair
530  else
531  embedded = false;
532  }
533  }
534  }
535  in_quote = false; // end of quote reached
536  }
537  else
538  {
539  i = this_s.find_first_of(targets, i, 2);
540  if (i == std::string::npos)
541  break; // nothing found
542  if (this_s[i] == q)
543  in_quote = true;
544  else if (this_s.compare(i + 1, rest.size(), rest) == 0) // splitter found
545  {
546  substrings.push_back(this_s.substr(start, i - start));
547  start = i + splitter.size();
548  i = start - 1; // increased by loop
549  }
550  }
551  }
552  if (in_quote) // reached end without finding closing quotation mark
553  {
555  __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
556  "unbalanced quotation marks in string '" + this_s + "'");
557  }
558  substrings.push_back(this_s.substr(start, this_s.size() - start));
559  return substrings.size() > 1;
560  }
561 
562  static inline String& toUpper(String & this_s)
563  {
564  std::transform(this_s.begin(), this_s.end(), this_s.begin(), (int (*)(int))toupper);
565  return this_s;
566  }
567 
568  static inline String& firstToUpper(String & this_s)
569  {
570  if (!this_s.empty())
571  {
572  this_s[0] = toupper(this_s[0]);
573  }
574  return this_s;
575  }
576 
577  static inline String& toLower(String & this_s)
578  {
579  std::transform(this_s.begin(), this_s.end(), this_s.begin(), (int (*)(int))tolower);
580  return this_s;
581  }
582 
583  static inline String& substitute(String & this_s, char from, char to)
584  {
585  std::replace(this_s.begin(), this_s.end(), from, to);
586  return this_s;
587  }
588 
589  static inline String& substitute(String & this_s, const String& from, const String& to)
590  {
591  if (!from.empty())
592  {
593  std::vector<String> parts;
594  this_s.split(from, parts);
595  this_s.concatenate(parts.begin(), parts.end(), to);
596  }
597  return this_s;
598  }
599 
600  static inline String& remove(String & this_s, char what)
601  {
602  this_s.erase(std::remove(this_s.begin(), this_s.end(), what), this_s.end());
603  return this_s;
604  }
605 
606  static inline String& ensureLastChar(String & this_s, char end)
607  {
608  if (!this_s.hasSuffix(end))
609  this_s.append(1, end);
610  return this_s;
611  }
612 
613  static inline String& removeWhitespaces(String& this_s)
614  {
615  std::string::const_iterator it = this_s.begin();
616  std::string::iterator dest = this_s.begin();
617  std::string::const_iterator it_end = this_s.end();
618  bool has_spaces(false);
619  while (it != it_end)
620  {
621  const char c = *it;
622  if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
623  {
624  ++it;
625  has_spaces = true;
626  continue; // no need to copy a whitespace
627  }
628  // copy to the left, if we had a whitespace before
629  if (has_spaces) *dest = *it;
630  // advance both
631  ++dest;
632  ++it;
633  }
634 
635  // shorten result
636  if (has_spaces) this_s.resize(dest - this_s.begin());
637 
638  return this_s;
639  }
640 
641  }
642 
643 } // namespace OPENMS
644 
Invalid conversion exception.
Definition: Exception.h:356
Element could not be found exception.
Definition: Exception.h:676
Int overflow exception.
Definition: Exception.h:247
Int underflow exception.
Definition: Exception.h:209
A more convenient string class.
Definition: String.h:60
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
String prefix(SizeType length) const
returns the prefix of length length
QuotingMethod
How to handle embedded quotes when quoting strings.
Definition: String.h:81
@ ESCAPE
Definition: String.h:81
@ DOUBLE
Definition: String.h:81
@ NONE
Definition: String.h:81
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
String & trim()
removes whitespaces (space, tab, line feed, carriage return) at the beginning and the end of the stri...
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
String suffix(SizeType length) const
returns the suffix of length length
void concatenate(StringIterator first, StringIterator last, const String &glue="")
Concatenates all elements from first to last-1 and inserts glue between the elements.
Definition: String.h:498
OPENMS_BYTE_TYPE Byte
Byte type.
Definition: Types.h:111
int Int
Signed integer type.
Definition: Types.h:102
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
const double c
Definition: Constants.h:214
static bool hasPrefix(const String &this_s, const String &string)
Definition: StringUtilsSimple.h:111
static String & firstToUpper(String &this_s)
Definition: StringUtilsSimple.h:568
static String & remove(String &this_s, char what)
Definition: StringUtilsSimple.h:600
static String chop(const String &this_s, Size n)
Definition: StringUtilsSimple.h:219
static String & reverse(String &this_s)
Definition: StringUtilsSimple.h:355
static bool isQuoted(const String &this_s, char q)
Definition: StringUtilsSimple.h:267
static String numberLength(double d, UInt n)
Functions.
Definition: StringUtilsSimple.h:57
static String & substitute(String &this_s, char from, char to)
Definition: StringUtilsSimple.h:583
static bool hasSuffix(const String &this_s, const String &string)
Definition: StringUtilsSimple.h:124
static String & toLower(String &this_s)
Definition: StringUtilsSimple.h:577
static String & ensureLastChar(String &this_s, char end)
Definition: StringUtilsSimple.h:606
static String random(UInt length)
Definition: StringUtilsSimple.h:331
static String & trim(String &this_s)
Definition: StringUtilsSimple.h:229
static String & simplify(String &this_s)
Definition: StringUtilsSimple.h:305
static String & unquote(String &this_s, char q, String::QuotingMethod method)
Definition: StringUtilsSimple.h:285
static String & removeWhitespaces(String &this_s)
Definition: StringUtilsSimple.h:613
static String & toUpper(String &this_s)
Definition: StringUtilsSimple.h:562
static bool split(const String &this_s, const char splitter, std::vector< String > &substrings, bool quote_protect)
Definition: StringUtilsSimple.h:365
static bool hasSubstring(const String &this_s, const String &string)
Definition: StringUtilsSimple.h:137
static String & fillRight(String &this_s, char c, UInt size)
Definition: StringUtilsSimple.h:102
static bool has(const String &this_s, Byte byte)
Definition: StringUtilsSimple.h:142
static String & quote(String &this_s, char q, String::QuotingMethod method)
Definition: StringUtilsSimple.h:272
static bool split_quoted(const String &this_s, const String &splitter, std::vector< String > &substrings, char q, String::QuotingMethod method)
Definition: StringUtilsSimple.h:487
static String suffix(const String &this_s, size_t length)
Definition: StringUtilsSimple.h:156
static String substr(const String &this_s, size_t pos, size_t n)
Definition: StringUtilsSimple.h:213
static String & fillLeft(String &this_s, char c, UInt size)
Definition: StringUtilsSimple.h:93
static String prefix(const String &this_s, size_t length)
Definition: StringUtilsSimple.h:147
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
constexpr Int writtenDigits(const FloatingPointType &=FloatingPointType())
Number of digits commonly used for writing a floating point type (a.k.a. precision)....
Definition: Types.h:294