VTK-m  2.0
DescriptiveStatistics.h
Go to the documentation of this file.
1 //============================================================================
2 // Copyright (c) Kitware, Inc.
3 // All rights reserved.
4 // See LICENSE.txt for details.
5 //
6 // This software is distributed WITHOUT ANY WARRANTY; without even
7 // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8 // PURPOSE. See the above copyright notice for more information.
9 //============================================================================
10 #ifndef vtk_m_worklet_DescriptiveStatistics_h
11 #define vtk_m_worklet_DescriptiveStatistics_h
12 
13 #include <vtkm/cont/Algorithm.h>
14 #include <vtkm/cont/ArrayCopy.h>
17 
18 namespace vtkm
19 {
20 namespace worklet
21 {
23 {
24 public:
25  template <typename T>
26  struct StatState
27  {
30  : n_(0)
31  , min_(std::numeric_limits<T>::max())
32  , max_(std::numeric_limits<T>::lowest())
33  , sum_(0)
34  , mean_(0)
35  , M2_(0)
36  , M3_(0)
37  , M4_(0)
38  {
39  }
40 
42  StatState(T value)
43  : n_(1)
44  , min_(value)
45  , max_(value)
46  , sum_(value)
47  , mean_(value)
48  , M2_(0)
49  , M3_(0)
50  , M4_(0)
51  {
52  }
53 
56  {
57  const StatState<T>& x = *this;
58  StatState result;
59 
60  result.n_ = x.n_ + y.n_;
61 
62  result.min_ = vtkm::Min(x.min_, y.min_);
63  result.max_ = vtkm::Max(x.max_, y.max_);
64 
65  // TODO: consider implementing compensated sum
66  // https://en.wikipedia.org/wiki/Kahan_summation_algorithm
67  result.sum_ = x.sum_ + y.sum_;
68 
69  // It is tempting to try to deviate from the literature and calculate
70  // mean in each "reduction" from sum and n. This saves one multiplication.
71  // However, RESIST THE TEMPTATION!!! This takes us back to the naive
72  // algorithm (mean = sum of a bunch of numbers / N) that actually
73  // accumulates more error and causes problem when calculating M2
74  // (and thus variance).
75  // TODO: Verify that FieldStatistics exhibits the same problem since
76  // it is using a "parallel" version of the naive algorithm as well.
77  // TODO: or better, just deprecate FieldStatistics.
78  T delta = y.mean_ - x.mean_;
79  result.mean_ = x.mean_ + delta * y.n_ / result.n_;
80 
81  T delta2 = delta * delta;
82  result.M2_ = x.M2_ + y.M2_ + delta2 * x.n_ * y.n_ / result.n_;
83 
84  T delta3 = delta * delta2;
85  T n2 = result.n_ * result.n_;
86  result.M3_ = x.M3_ + y.M3_;
87  result.M3_ += delta3 * x.n_ * y.n_ * (x.n_ - y.n_) / n2;
88  result.M3_ += T(3.0) * delta * (x.n_ * y.M2_ - y.n_ * x.M2_) / result.n_;
89 
90  T delta4 = delta2 * delta2;
91  T n3 = result.n_ * n2;
92  result.M4_ = x.M4_ + y.M4_;
93  result.M4_ += delta4 * x.n_ * y.n_ * (x.n_ * x.n_ - x.n_ * y.n_ + y.n_ * y.n_) / n3;
94  result.M4_ += T(6.0) * delta2 * (x.n_ * x.n_ * y.M2_ + y.n_ * y.n_ * x.M2_) / n2;
95  result.M4_ += T(4.0) * delta * (x.n_ * y.M3_ - y.n_ * x.M3_) / result.n_;
96 
97  return result;
98  }
99 
101  T N() const { return this->n_; }
102 
104  T Min() const { return this->min_; }
105 
107  T Max() const { return this->max_; }
108 
110  T Sum() const { return this->sum_; }
111 
113  T Mean() const { return this->mean_; }
114 
116  T SampleStddev() const { return vtkm::Sqrt(this->SampleVariance()); }
117 
119  T PopulationStddev() const { return vtkm::Sqrt(this->PopulationVariance()); }
120 
122  T SampleVariance() const
123  {
124  VTKM_ASSERT(n_ != 1);
125  return this->M2_ / (this->n_ - 1);
126  }
127 
129  T PopulationVariance() const { return this->M2_ / this->n_; }
130 
132  T Skewness() const
133  {
134  if (this->M2_ == 0)
135  // Shamelessly swiped from Boost Math
136  // The limit is technically undefined, but the interpretation here is clear:
137  // A constant dataset has no skewness.
138  return T(0);
139  else
140  return vtkm::Sqrt(this->n_) * this->M3_ / vtkm::Pow(this->M2_, T{ 1.5 });
141  }
142 
144  T Kurtosis() const
145  {
146  if (this->M2_ == 0)
147  // Shamelessly swiped from Boost Math
148  // The limit is technically undefined, but the interpretation here is clear:
149  // A constant dataset has no kurtosis.
150  return T(0);
151  else
152  return this->n_ * this->M4_ / (this->M2_ * this->M2_);
153  }
154 
155  private:
156  // GCC4.8 is not happy about initializing data members here.
157  T n_;
158  T min_;
159  T max_;
160  T sum_;
161  T mean_;
162  T M2_;
163  T M3_;
164  T M4_;
165  }; // StatState
166 
168  {
169  template <typename T>
171  {
173  }
174  };
175 
185  template <typename FieldType, typename Storage>
188  {
189  using Algorithm = vtkm::cont::Algorithm;
190 
191  // Essentially a TransformReduce. Do we have that convenience in VTKm?
193  return Algorithm::Reduce(states, StatState<FieldType>{});
194  }
195 
196  template <typename KeyType, typename ValueType, typename KeyInStorage, typename ValueInStorage>
201  {
202  using Algorithm = vtkm::cont::Algorithm;
203 
204  // Make a copy of the input arrays so we don't modify them
206  vtkm::cont::ArrayCopy(keys, keys_copy);
207 
209  vtkm::cont::ArrayCopy(values, values_copy);
210 
211  // Gather values of the same key by sorting them according to keys
212  Algorithm::SortByKey(keys_copy, values_copy);
213 
214  auto states = vtkm::cont::make_ArrayHandleTransform(values_copy, MakeStatState{});
216 
218  Algorithm::ReduceByKey(keys_copy, states, keys_out, results, vtkm::Add{});
219 
220  return vtkm::cont::make_ArrayHandleZip(keys_out, results);
221  }
222 }; // DescriptiveStatistics
223 
224 } // worklet
225 } // vtkm
226 #endif // vtk_m_worklet_DescriptiveStatistics_h
vtkm::Sqrt
VTKM_EXEC_CONT vtkm::Float32 Sqrt(vtkm::Float32 x)
Compute the square root of x.
Definition: Math.h:958
vtkm::cont::ArrayHandle
Manages an array-worth of data.
Definition: ArrayHandle.h:283
vtkm::worklet::DescriptiveStatistics::StatState::sum_
T sum_
Definition: DescriptiveStatistics.h:160
vtkm
Groups connected points that have the same field value.
Definition: Atomic.h:19
VTKM_ASSERT
#define VTKM_ASSERT(condition)
Definition: Assert.h:43
VTKM_EXEC_CONT
#define VTKM_EXEC_CONT
Definition: ExportMacros.h:52
vtkm::worklet::DescriptiveStatistics::StatState::M4_
T M4_
Definition: DescriptiveStatistics.h:164
vtkm::worklet::DescriptiveStatistics::StatState::PopulationVariance
VTKM_EXEC_CONT T PopulationVariance() const
Definition: DescriptiveStatistics.h:129
ArrayHandleTransform.h
vtkm::worklet::DescriptiveStatistics::StatState::Max
VTKM_EXEC_CONT T Max() const
Definition: DescriptiveStatistics.h:107
vtkm::worklet::DescriptiveStatistics::Run
static VTKM_CONT StatState< FieldType > Run(const vtkm::cont::ArrayHandle< FieldType, Storage > &field)
Calculate various summary statistics for the input ArrayHandle.
Definition: DescriptiveStatistics.h:186
vtkm::worklet::DescriptiveStatistics::StatState::mean_
T mean_
Definition: DescriptiveStatistics.h:161
vtkm::worklet::DescriptiveStatistics::Run
static VTKM_CONT auto Run(const vtkm::cont::ArrayHandle< KeyType, KeyInStorage > &keys, const vtkm::cont::ArrayHandle< ValueType, ValueInStorage > &values) -> vtkm::cont::ArrayHandleZip< vtkm::cont::ArrayHandle< KeyType >, vtkm::cont::ArrayHandle< StatState< ValueType >>>
Definition: DescriptiveStatistics.h:197
vtkm::worklet::DescriptiveStatistics::StatState::StatState
VTKM_EXEC_CONT StatState()
Definition: DescriptiveStatistics.h:29
ArrayCopy.h
vtkm::worklet::DescriptiveStatistics::StatState::SampleVariance
VTKM_EXEC_CONT T SampleVariance() const
Definition: DescriptiveStatistics.h:122
vtkm::worklet::DescriptiveStatistics::StatState::Skewness
VTKM_EXEC_CONT T Skewness() const
Definition: DescriptiveStatistics.h:132
ArrayHandleZip.h
vtkm::Add
Definition: Types.h:222
vtkm::worklet::DescriptiveStatistics::MakeStatState
Definition: DescriptiveStatistics.h:167
vtkm::cont::ArrayHandleZip
ArrayHandleZip is a specialization of ArrayHandle.
Definition: ArrayHandleZip.h:251
vtkm::worklet::DescriptiveStatistics::StatState::Kurtosis
VTKM_EXEC_CONT T Kurtosis() const
Definition: DescriptiveStatistics.h:144
vtkm::cont::make_ArrayHandleTransform
VTKM_CONT vtkm::cont::ArrayHandleTransform< HandleType, FunctorType > make_ArrayHandleTransform(HandleType handle, FunctorType functor)
make_ArrayHandleTransform is convenience function to generate an ArrayHandleTransform.
Definition: ArrayHandleTransform.h:474
Algorithm.h
vtkm::worklet::DescriptiveStatistics::StatState::operator+
VTKM_EXEC_CONT StatState operator+(const StatState< T > &y) const
Definition: DescriptiveStatistics.h:55
vtkm::worklet::DescriptiveStatistics::StatState::Sum
VTKM_EXEC_CONT T Sum() const
Definition: DescriptiveStatistics.h:110
vtkm::cont::make_ArrayHandleZip
VTKM_CONT vtkm::cont::ArrayHandleZip< FirstHandleType, SecondHandleType > make_ArrayHandleZip(const FirstHandleType &first, const SecondHandleType &second)
A convenience function for creating an ArrayHandleZip.
Definition: ArrayHandleZip.h:288
vtkm::cont::ArrayCopy
void ArrayCopy(const SourceArrayType &source, DestArrayType &destination)
Does a deep copy from one array to another array.
Definition: ArrayCopy.h:142
vtkm::worklet::DescriptiveStatistics::StatState
Definition: DescriptiveStatistics.h:26
VTKM_CONT
#define VTKM_CONT
Definition: ExportMacros.h:57
vtkm::worklet::DescriptiveStatistics::MakeStatState::operator()
VTKM_EXEC_CONT vtkm::worklet::DescriptiveStatistics::StatState< T > operator()(T value) const
Definition: DescriptiveStatistics.h:170
vtkm::worklet::DescriptiveStatistics::StatState::max_
T max_
Definition: DescriptiveStatistics.h:159
vtkm::worklet::DescriptiveStatistics::StatState::M3_
T M3_
Definition: DescriptiveStatistics.h:163
vtkm::worklet::DescriptiveStatistics::StatState::Mean
VTKM_EXEC_CONT T Mean() const
Definition: DescriptiveStatistics.h:113
vtkm::worklet::DescriptiveStatistics::StatState::SampleStddev
VTKM_EXEC_CONT T SampleStddev() const
Definition: DescriptiveStatistics.h:116
vtkm::cont::Algorithm
Definition: Algorithm.h:385
vtkm::worklet::DescriptiveStatistics::StatState::n_
T n_
Definition: DescriptiveStatistics.h:157
vtkm::worklet::DescriptiveStatistics::StatState::N
VTKM_EXEC_CONT T N() const
Definition: DescriptiveStatistics.h:101
vtkm::worklet::DescriptiveStatistics
Definition: DescriptiveStatistics.h:22
vtkm::worklet::DescriptiveStatistics::StatState::min_
T min_
Definition: DescriptiveStatistics.h:158
vtkm::worklet::DescriptiveStatistics::StatState::M2_
T M2_
Definition: DescriptiveStatistics.h:162
vtkm::worklet::DescriptiveStatistics::StatState::Min
VTKM_EXEC_CONT T Min() const
Definition: DescriptiveStatistics.h:104
vtkm::worklet::DescriptiveStatistics::StatState::StatState
VTKM_EXEC_CONT StatState(T value)
Definition: DescriptiveStatistics.h:42
vtkm::worklet::DescriptiveStatistics::StatState::PopulationStddev
VTKM_EXEC_CONT T PopulationStddev() const
Definition: DescriptiveStatistics.h:119