OpenMS
ExperimentalDesign.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Timo Sachsenberg $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 
41 #include <vector>
42 #include <map>
43 #include <set>
44 
45 namespace OpenMS
46 {
47  class ConsensusMap;
48  class FeatureMap;
49 
244  class OPENMS_DLLAPI ExperimentalDesign
245  {
246 
247  public:
254  class OPENMS_DLLAPI MSFileSectionEntry
255  {
256  public:
257  MSFileSectionEntry() = default;
258  unsigned fraction_group = 1;
259  unsigned fraction = 1;
260  std::string path = "UNKNOWN_FILE";
261  unsigned label = 1;
262  unsigned sample = 0;
263  String sample_name = "0";
264  };
265 
266  class OPENMS_DLLAPI SampleSection
267  {
268  public:
269 
270  SampleSection() = default;
271 
273  const std::vector< std::vector < String > >& content,
274  const std::map< String, Size >& sample_to_rowindex,
275  const std::map< String, Size >& columnname_to_columnindex
276  );
277 
278  // Get set of all samples that are present in the sample section
279  std::set< String > getSamples() const;
280 
281  // Add a sample as the last row
282  void addSample(const String& sample, const std::vector<String>& content = {});
283 
284  // TODO should it include the Sample ID column or not??
285  // Get set of all factors (column names) that were defined for the sample section
286  std::set< String > getFactors() const;
287 
288  // Checks whether sample section has row for a sample number
289  bool hasSample(const String& sample) const;
290 
291  // Checks whether Sample Section has a specific factor (i.e. column name)
292  bool hasFactor(const String &factor) const;
293 
294  // Returns value of factor for given sample and factor name
295  String getFactorValue(const String& sample_name, const String &factor) const;
296 
297  // Returns value of factor for given sample index and factor name
298  String getFactorValue(unsigned sample_idx, const String &factor) const;
299 
300  // Returns column index of factor
301  Size getFactorColIdx(const String &factor) const;
302 
303  // Returns the name/ID of the sample. Not necessarily the row index
304  String getSampleName(unsigned sample_row) const;
305 
306  // Returns the row index in the sample section for a sample name/ID
307  unsigned getSampleRow(const String& sample) const;
308 
311 
312  private:
313 
314  // The entries of the Sample Section, filled while parsing
315  // the Experimental Design File
316  std::vector< std::vector < String > > content_;
317 
318  // Maps the Sample Entry name to the row where the sample
319  // appears in the Sample section, its sample index
320  std::map< String, Size > sample_to_rowindex_;
321 
322  // Maps the column name of the SampleSection to the
323  // Index of the column
324  std::map< String, Size > columnname_to_columnindex_;
325  };
326 
327  using MSFileSection = std::vector<MSFileSectionEntry>;
328 
329  // Experimental Design c'tors
330  ExperimentalDesign() = default;
331 
332  ExperimentalDesign(const MSFileSection& msfile_section, const SampleSection& sample_section);
333 
335 
336  void setMSFileSection(const MSFileSection& msfile_section);
337 
338  // Returns the Sample Section of the experimental design file
340 
341  void setSampleSection(const SampleSection& sample_section);
342 
345  std::map<std::vector<String>, std::set<String>> getUniqueSampleRowToSampleMapping() const;
346 
349  std::map<String, unsigned> getSampleToPrefractionationMapping() const;
350 
352  //TODO this probably needs a basename parameter to be fully compatible with the other mappings!! Implicit full path.
353  std::map<unsigned int, std::vector<String> > getFractionToMSFilesMapping() const;
354 
357  //TODO this probably needs a basename parameter to be fully compatible with the other mappings!! Implicit full path.
358  std::vector<std::vector<std::pair<String, unsigned>>> getConditionToPathLabelVector() const;
359 
361  std::map<std::vector<String>, std::set<unsigned>> getConditionToSampleMapping() const;
362 
363  /*
364  * The (Path, Label) tuples in the experimental design have to be unique, so we can map them
365  * uniquely to the sample number, fraction number, and fraction_group number
366  */
367 
370  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToPrefractionationMapping(bool use_basename_only) const;
371 
374  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToConditionMapping(bool use_basename_only) const;
375 
378  std::map<String, unsigned> getSampleToConditionMapping() const;
379 
381  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToSampleMapping(bool use_basename_only) const;
382 
384  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToFractionMapping(bool use_basename_only) const;
385 
387  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToFractionGroupMapping(bool use_basename_only) const;
388 
389  // @return the number of samples measured (= highest sample index)
390  unsigned getNumberOfSamples() const;
391 
392  // @return the number of fractions (= highest fraction index)
393  unsigned getNumberOfFractions() const;
394 
395  // @return the number of labels per file
396  unsigned getNumberOfLabels() const;
397 
398  // @return the number of MS files (= fractions * fraction groups)
399  unsigned getNumberOfMSFiles() const;
400 
401  // @return the number of fraction_groups
402  // Allows to group fraction ids and source files
403  unsigned getNumberOfFractionGroups() const;
404 
405  // @return sample index (depends on fraction_group and label)
406  unsigned getSample(unsigned fraction_group, unsigned label = 1);
407 
409  // This is the case if we have at least one fraction group with >= 2 fractions
410  bool isFractionated() const;
411 
415  Size filterByBasenames(const std::set<String>& bns);
416 
419 
422 
425 
427  static ExperimentalDesign fromIdentifications(const std::vector<ProteinIdentification>& proteins);
428  //TODO create another overload here, that takes two enums outerVec and innerVec with entries Replicate, Fraction, Sample
429 
430  private:
431  // MS filename column, optionally trims to basename
432  std::vector< String > getFileNames_(bool basename) const;
433 
434  // returns label column
435  std::vector<unsigned> getLabels_() const;
436 
437  // returns fraction column
438  std::vector<unsigned> getFractions_() const;
439 
441  std::map< std::pair< String, unsigned >, unsigned> pathLabelMapper_(
442  bool,
443  unsigned (*f)(const ExperimentalDesign::MSFileSectionEntry&)) const;
444 
445  // sort to obtain the default order
446  void sort_();
447 
448  template<typename T>
449  static void errorIfAlreadyExists(std::set<T> &container, T &item, const String &message);
450 
451  // basic consistency checks
452  void isValid_();
453 
456  };
457 }
458 
A container for consensus elements.
Definition: ConsensusMap.h:92
Definition: ExperimentalDesign.h:255
Definition: ExperimentalDesign.h:267
SampleSection(const std::vector< std::vector< String > > &content, const std::map< String, Size > &sample_to_rowindex, const std::map< String, Size > &columnname_to_columnindex)
bool hasSample(const String &sample) const
std::vector< std::vector< String > > content_
Definition: ExperimentalDesign.h:316
std::map< String, Size > columnname_to_columnindex_
Definition: ExperimentalDesign.h:324
void addSample(const String &sample, const std::vector< String > &content={})
String getSampleName(unsigned sample_row) const
std::map< String, Size > sample_to_rowindex_
Definition: ExperimentalDesign.h:320
Size getContentSize() const
returns the number of entries in content_ member
std::set< String > getFactors() const
Size getFactorColIdx(const String &factor) const
std::set< String > getSamples() const
bool hasFactor(const String &factor) const
String getFactorValue(const String &sample_name, const String &factor) const
unsigned getSampleRow(const String &sample) const
String getFactorValue(unsigned sample_idx, const String &factor) const
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition: ExperimentalDesign.h:245
unsigned getNumberOfLabels() const
static void errorIfAlreadyExists(std::set< T > &container, T &item, const String &message)
unsigned getNumberOfFractions() const
static ExperimentalDesign fromConsensusMap(const ConsensusMap &c)
Extract experimental design from consensus map.
unsigned getSample(unsigned fraction_group, unsigned label=1)
std::map< std::vector< String >, std::set< String > > getUniqueSampleRowToSampleMapping() const
unsigned getNumberOfSamples() const
void setSampleSection(const SampleSection &sample_section)
std::vector< unsigned > getLabels_() const
bool sameNrOfMSFilesPerFraction() const
Size filterByBasenames(const std::set< String > &bns)
unsigned getNumberOfFractionGroups() const
std::map< unsigned int, std::vector< String > > getFractionToMSFilesMapping() const
return fraction index to file paths (ordered by fraction_group)
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToFractionMapping(bool use_basename_only) const
return <file_path, label> to fraction mapping
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToPrefractionationMapping(bool use_basename_only) const
std::map< std::vector< String >, std::set< unsigned > > getConditionToSampleMapping() const
return a condition (unique combination of sample section values except replicate) to Sample index map...
void setMSFileSection(const MSFileSection &msfile_section)
std::vector< MSFileSectionEntry > MSFileSection
Definition: ExperimentalDesign.h:327
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToConditionMapping(bool use_basename_only) const
MSFileSection msfile_section_
Definition: ExperimentalDesign.h:454
static ExperimentalDesign fromIdentifications(const std::vector< ProteinIdentification > &proteins)
Extract experimental design from identifications.
std::map< std::pair< String, unsigned >, unsigned > pathLabelMapper_(bool, unsigned(*f)(const ExperimentalDesign::MSFileSectionEntry &)) const
Generic Mapper (Path, Label) -> f(row)
ExperimentalDesign(const MSFileSection &msfile_section, const SampleSection &sample_section)
std::map< String, unsigned > getSampleToPrefractionationMapping() const
std::vector< unsigned > getFractions_() const
const MSFileSection & getMSFileSection() const
std::vector< String > getFileNames_(bool basename) const
const ExperimentalDesign::SampleSection & getSampleSection() const
static ExperimentalDesign fromFeatureMap(const FeatureMap &f)
Extract experimental design from feature map.
std::vector< std::vector< std::pair< String, unsigned > > > getConditionToPathLabelVector() const
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToSampleMapping(bool use_basename_only) const
return <file_path, label> to sample index mapping
SampleSection sample_section_
Definition: ExperimentalDesign.h:455
std::map< String, unsigned > getSampleToConditionMapping() const
unsigned getNumberOfMSFiles() const
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToFractionGroupMapping(bool use_basename_only) const
return <file_path, label> to fraction_group mapping
A container for features.
Definition: FeatureMap.h:106
A more convenient string class.
Definition: String.h:60
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
const double c
Definition: Constants.h:214
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48