casacore
FitToHalfStatistics.h
Go to the documentation of this file.
1 //# Copyright (C) 2000,2001
2 //# Associated Universities, Inc. Washington DC, USA.
3 //#
4 //# This library is free software; you can redistribute it and/or modify it
5 //# under the terms of the GNU Library General Public License as published by
6 //# the Free Software Foundation; either version 2 of the License, or (at your
7 //# option) any later version.
8 //#
9 //# This library is distributed in the hope that it will be useful, but WITHOUT
10 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12 //# License for more details.
13 //#
14 //# You should have received a copy of the GNU Library General Public License
15 //# along with this library; if not, write to the Free Software Foundation,
16 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17 //#
18 //# Correspondence concerning AIPS++ should be addressed as follows:
19 //# Internet email: aips2-request@nrao.edu.
20 //# Postal address: AIPS++ Project Office
21 //# National Radio Astronomy Observatory
22 //# 520 Edgemont Road
23 //# Charlottesville, VA 22903-2475 USA
24 //#
25 
26 #ifndef SCIMATH_FITTOHALFSTATISTICS_H
27 #define SCIMATH_FITTOHALFSTATISTICS_H
28 
29 #include <casacore/casa/aips.h>
30 
31 #include <casacore/scimath/StatsFramework/ConstrainedRangeStatistics.h>
32 #include <casacore/scimath/StatsFramework/FitToHalfStatisticsData.h>
33 
34 namespace casacore {
35 
36 // Class to calculate statistics using the so-called fit to half algorithm. In
37 // this algorithm, a center value is specified, and only points greater or equal
38 // or less or equal this value are included. Furthermore, each of the included
39 // points is reflected about the center value, and these virtual points are
40 // added to the included points and the union of sets of included real points
41 // and virtual points are used for computing statistics. The specified center
42 // point is therefore the mean and median of the resulting distribution, and the
43 // total number of points is exactly twice the number of real data points that
44 // are included.
45 //
46 // This class uses a ConstrainedRangeQuantileComputer object for computing
47 // quantile-like statistics. See class documentation for StatisticsAlgorithm for
48 // details regarding QuantileComputer classes.
49 
50 template <
51  class AccumType, class DataIterator, class MaskIterator=const Bool *,
52  class WeightsIterator=DataIterator
53 >
55  : public ConstrainedRangeStatistics<CASA_STATP> {
56 public:
57 
58  const static AccumType TWO;
59 
60  // <src>value</src> is only used if <src>center</src>=CVALUE
65  );
66 
67  // copy semantics
69 
71 
72  // copy semantics
75  );
76 
77  // Clone this instance. Caller is responsible for deleting the returned
78  // pointer.
80 
81  // get the algorithm that this object uses for computing stats
84  };
85 
86  // The median is just the center value, so none of the parameters to this
87  // method are used.
88  AccumType getMedian(
89  CountedPtr<uInt64> knownNpts=nullptr,
90  CountedPtr<AccumType> knownMin=nullptr,
91  CountedPtr<AccumType> knownMax=nullptr,
92  uInt binningThreshholdSizeBytes=4096*4096,
93  Bool persistSortedArray=False, uInt nBins=10000
94  );
95 
96  // <group>
97  // In the following group of methods, if the size of the composite dataset
98  // is smaller than <src>binningThreshholdSizeBytes</src>, the composite
99  // dataset will be (perhaps partially) sorted and persisted in memory during
100  // the call. In that case, and if <src>persistSortedArray</src> is True,
101  // this sorted array will remain in memory after the call and will be used
102  // on subsequent calls of this method when
103  // <src>binningThreshholdSizeBytes</src> is greater than the size of the
104  // composite dataset. If <src>persistSortedArray</src> is False, the sorted
105  // array will not be stored after this call completes and so any subsequent
106  // calls for which the dataset size is less than
107  // <src>binningThreshholdSizeBytes</src>, the dataset will be sorted from
108  // scratch. Values which are not included due to non-unity strides, are not
109  // included in any specified ranges, are masked, or have associated weights
110  // of zero are not considered as dataset members for quantile computations.
111  // If one has a priori information regarding the number of points (npts)
112  // and/or the minimum and maximum values of the data set, these can be
113  // supplied to improve performance. Note however, that if these values are
114  // not correct, the resulting median and/or quantile values will also not be
115  // correct (although see the following notes regarding max/min). Note that
116  // if this object has already had getStatistics() called, and the min and
117  // max were calculated, there is no need to pass these values in as they
118  // have been stored internally and will be used (although passing them
119  // explicitly shouldn't hurt anything). If provided, npts, the number of
120  // points falling in the specified ranges which are not masked and have
121  // weights > 0, should be correct. <src>min</src> can be less than the true
122  // minimum, and <src>max</src> can be greater than the True maximum, but for
123  // best performance, these should be as close to the actual min and max as
124  // possible (and ideally the actual min/max values of the data set).
126  std::map<Double, AccumType>& quantiles,
127  const std::set<Double>& fractions,
128  CountedPtr<uInt64> knownNpts=nullptr,
129  CountedPtr<AccumType> knownMin=nullptr,
130  CountedPtr<AccumType> knownMax=nullptr,
131  uInt binningThreshholdSizeBytes=4096*4096,
132  Bool persistSortedArray=False, uInt nBins=10000
133  );
134 
135  // get the median of the absolute deviation about the median of the data.
137  CountedPtr<uInt64> knownNpts=nullptr,
138  CountedPtr<AccumType> knownMin=nullptr,
139  CountedPtr<AccumType> knownMax=nullptr,
140  uInt binningThreshholdSizeBytes=4096*4096,
141  Bool persistSortedArray=False, uInt nBins=10000
142  );
143 
144  // Get the specified quantiles. <src>fractions</src> must be between 0 and
145  // 1, noninclusive.
146  std::map<Double, AccumType> getQuantiles(
147  const std::set<Double>& fractions,
148  CountedPtr<uInt64> knownNpts=nullptr,
149  CountedPtr<AccumType> knownMin=nullptr,
150  CountedPtr<AccumType> knownMax=nullptr,
151  uInt binningThreshholdSizeBytes=4096*4096,
152  Bool persistSortedArray=False, uInt nBins=10000
153  );
154  // </group>
155 
156  // scan the dataset(s) that have been added, and find the min and max.
157  // This method may be called even if setStatsToCaclulate has been called and
158  // MAX and MIN has been excluded.
159  virtual void getMinMax(AccumType& mymin, AccumType& mymax);
160 
161  // scan the dataset(s) that have been added, and find the number of good
162  // points. This method may be called even if setStatsToCaclulate has been
163  // called and NPTS has been excluded. If setCalculateAsAdded(True) has
164  // previously been called after this object has been (re)initialized, an
165  // exception will be thrown.
167 
168  // reset object to initial state. Clears all private fields including data,
169  // accumulators, global range. It does not affect the center type, center
170  // value, or which "side" to use which were set at construction.
171  virtual void reset();
172 
173  // This class does not allow statistics to be calculated as datasets are
174  // added, so an exception will be thrown if <src>c</src> is True.
176 
177  // Override base class method by requiring mean to be computed in addition
178  // to what is added in stats if the requested center value is CMEAN.
179  virtual void setStatsToCalculate(std::set<StatisticsData::STATS>& stats);
180 
181 protected:
182 
184 
186 
188 
189  inline const StatsData<AccumType>& _getStatsData() const {
190  return _statsData;
191  }
192 
193  // <group>
194  // no weights, no mask, no ranges
196  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
197  const DataIterator& dataBegin, uInt64 nr, uInt dataStride
198  );
199 
200  // no weights, no mask
202  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
203  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
204  const DataRanges& ranges, Bool isInclude
205  );
206 
208  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
209  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
210  const MaskIterator& maskBegin, uInt maskStride
211  );
212 
214  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
215  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
216  const MaskIterator& maskBegin, uInt maskStride,
217  const DataRanges& ranges, Bool isInclude
218  );
219  // </group>
220 
222 
223  // <group>
224  // has weights, but no mask, no ranges
226  StatsData<AccumType>& stats, LocationType& location,
227  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
228  uInt64 nr, uInt dataStride
229  );
230 
232  StatsData<AccumType>& stats, LocationType& location,
233  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
234  uInt64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
235  );
236 
238  StatsData<AccumType>& stats, LocationType& location,
239  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
240  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
241  uInt maskStride
242  );
243 
245  StatsData<AccumType>& stats, LocationType& location,
246  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
247  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
248  uInt maskStride, const DataRanges& ranges, Bool isInclude
249  );
250  // </group>
251 
252 private:
255  AccumType _centerValue;
258  // these are the max and min for the real portion of the dataset
261  // This is used for convenience and performance. It should always
262  // be the same as the _range value used in the base
263  // ConstrainedRangeStatistics object
265 
266  // get the min max of the entire (real + virtual) data set. Only
267  // used for quantile computation
269  CountedPtr<AccumType>& realMin, CountedPtr<AccumType>& realMax,
271  );
272 
273  // get the min/max of the real portion only of the dataset
274  void _getRealMinMax(AccumType& realMin, AccumType& realMax);
275 
276  void _setRange();
277 
278 };
279 
280 }
281 
282 #ifndef CASACORE_NO_AUTO_TEMPLATES
283 #include <casacore/scimath/StatsFramework/FitToHalfStatistics.tcc>
284 #endif
285 
286 #endif
casacore::FitToHalfStatistics::FitToHalfStatistics
FitToHalfStatistics(FitToHalfStatisticsData::CENTER center=FitToHalfStatisticsData::CMEAN, FitToHalfStatisticsData::USE_DATA useData=FitToHalfStatisticsData::LE_CENTER, AccumType value=0)
value is only used if center=CVALUE
casacore::FitToHalfStatistics::reset
virtual void reset()
reset object to initial state.
casacore::FitToHalfStatistics::_getMinMax
void _getMinMax(CountedPtr< AccumType > &realMin, CountedPtr< AccumType > &realMax, CountedPtr< AccumType > knownMin, CountedPtr< AccumType > knownMax)
get the min max of the entire (real + virtual) data set.
DataRanges
#define DataRanges
Definition: StatisticsTypes.h:51
casacore::FitToHalfStatistics::_weightedStats
void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride)
has weights, but no mask, no ranges
casacore::FitToHalfStatistics::setStatsToCalculate
virtual void setStatsToCalculate(std::set< StatisticsData::STATS > &stats)
Override base class method by requiring mean to be computed in addition to what is added in stats if ...
casacore::FitToHalfStatistics::_getInitialStats
virtual StatsData< AccumType > _getInitialStats() const
casacore::FitToHalfStatistics::getMedianAndQuantiles
AccumType getMedianAndQuantiles(std::map< Double, AccumType > &quantiles, const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
In the following group of methods, if the size of the composite dataset is smaller than binningThresh...
casacore::ConstrainedRangeStatistics
Abstract base class for statistics algorithms which are characterized by a range of good values.
Definition: ConstrainedRangeStatistics.h:49
casacore::StatisticsData::ALGORITHM
ALGORITHM
implemented algorithms
Definition: StatisticsData.h:47
casacore::CountedPtr
Referenced counted pointer for constant data.
Definition: CountedPtr.h:81
casacore::FitToHalfStatistics::_useLower
Bool _useLower
Definition: FitToHalfStatistics.h:254
casacore::FitToHalfStatistics::_getStatsData
StatsData< AccumType > & _getStatsData()
Retrieve stats structure.
Definition: FitToHalfStatistics.h:187
casacore::FitToHalfStatistics::_rangeIsSet
Bool _rangeIsSet
Definition: FitToHalfStatistics.h:257
casacore::FitToHalfStatistics::operator=
FitToHalfStatistics< CASA_STATP > & operator=(const FitToHalfStatistics< CASA_STATP > &other)
copy semantics
casacore::FitToHalfStatistics::_weightedStats
void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude)
casacore::FitToHalfStatistics::_weightedStats
void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride)
casacore::FitToHalfStatistics::setCalculateAsAdded
void setCalculateAsAdded(Bool c)
This class does not allow statistics to be calculated as datasets are added, so an exception will be ...
casacore::FitToHalfStatistics::getMedianAbsDevMed
AccumType getMedianAbsDevMed(CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
get the median of the absolute deviation about the median of the data.
casacore::FitToHalfStatistics::_range
CountedPtr< std::pair< AccumType, AccumType > > _range
This is used for convenience and performance.
Definition: FitToHalfStatistics.h:264
casacore::FitToHalfStatisticsData::USE_DATA
USE_DATA
which section of data to use, greater than or less than the center value
Definition: FitToHalfStatisticsData.h:48
casacore::FitToHalfStatistics::_realMin
CountedPtr< AccumType > _realMin
Definition: FitToHalfStatistics.h:259
casacore::FitToHalfStatistics::_statsData
StatsData< AccumType > _statsData
Definition: FitToHalfStatistics.h:256
casacore::FitToHalfStatistics::_centerType
FitToHalfStatisticsData::CENTER _centerType
Definition: FitToHalfStatistics.h:253
casacore::StatisticsAlgorithm< CASA_STATP >
casacore::FitToHalfStatistics::getQuantiles
std::map< Double, AccumType > getQuantiles(const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
Get the specified quantiles.
casacore::FitToHalfStatistics::TWO
static const AccumType TWO
Definition: FitToHalfStatistics.h:58
casacore::StatisticsData::FITTOHALF
@ FITTOHALF
Definition: StatisticsData.h:51
casacore::StatsData
Definition: StatisticsTypes.h:57
casacore::value
LatticeExprNode value(const LatticeExprNode &expr)
This function returns the value of the expression without a mask.
casacore::FitToHalfStatistics::_doMedAbsDevMed
Bool _doMedAbsDevMed
Definition: FitToHalfStatistics.h:257
casacore::False
const Bool False
Definition: aipstype.h:44
casacore::uInt
unsigned int uInt
Definition: aipstype.h:51
casacore::FitToHalfStatistics::_weightedStats
void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude)
casacore::FitToHalfStatistics
Class to calculate statistics using the so-called fit to half algorithm.
Definition: FitToHalfStatistics.h:55
casacore::FitToHalfStatistics::_getStatistics
virtual StatsData< AccumType > _getStatistics()
casacore::FitToHalfStatistics::getMedian
AccumType getMedian(CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
The median is just the center value, so none of the parameters to this method are used.
casacore::FitToHalfStatistics::_setRange
void _setRange()
derived classes need to implement how to set their respective range
casacore::FitToHalfStatistics::FitToHalfStatistics
FitToHalfStatistics(const FitToHalfStatistics< CASA_STATP > &other)
copy semantics
casacore::FitToHalfStatisticsData::CENTER
CENTER
choice of center point based on the corresponding statistics from the entire distribution of data,...
Definition: FitToHalfStatisticsData.h:41
casacore::FitToHalfStatistics::_realMax
CountedPtr< AccumType > _realMax
these are the max and min for the real portion of the dataset
Definition: FitToHalfStatistics.h:259
casacore
this file contains all the compiler specific defines
Definition: mainpage.dox:28
casacore::FitToHalfStatistics::_getStatsData
const StatsData< AccumType > & _getStatsData() const
Definition: FitToHalfStatistics.h:189
casacore::C::c
const Double c
Fundamental physical constants (SI units):
casacore::FitToHalfStatistics::_isNullSet
Bool _isNullSet
Definition: FitToHalfStatistics.h:260
casacore::FitToHalfStatistics::_getRealMinMax
void _getRealMinMax(AccumType &realMin, AccumType &realMax)
get the min/max of the real portion only of the dataset
casacore::FitToHalfStatistics::_unweightedStats
void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride)
no weights, no mask, no ranges
casacore::FitToHalfStatisticsData::LE_CENTER
@ LE_CENTER
Definition: FitToHalfStatisticsData.h:49
casacore::FitToHalfStatistics::_unweightedStats
void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride)
casacore::FitToHalfStatistics::_centerValue
AccumType _centerValue
Definition: FitToHalfStatistics.h:255
casacore::FitToHalfStatistics::_updateDataProviderMaxMin
void _updateDataProviderMaxMin(const StatsData< AccumType > &threadStats)
casacore::FitToHalfStatistics::~FitToHalfStatistics
virtual ~FitToHalfStatistics()
casacore::Bool
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
casacore::FitToHalfStatistics::algorithm
virtual StatisticsData::ALGORITHM algorithm() const
get the algorithm that this object uses for computing stats
Definition: FitToHalfStatistics.h:82
casacore::LocationType
std::pair< Int64, Int64 > LocationType
Definition: StatisticsTypes.h:55
casacore::FitToHalfStatistics::_unweightedStats
void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const MaskIterator &maskBegin, uInt maskStride, const DataRanges &ranges, Bool isInclude)
casacore::FitToHalfStatistics::clone
virtual StatisticsAlgorithm< CASA_STATP > * clone() const
Clone this instance.
casacore::FitToHalfStatisticsData::CMEAN
@ CMEAN
Definition: FitToHalfStatisticsData.h:42
casacore::FitToHalfStatistics::getMinMax
virtual void getMinMax(AccumType &mymin, AccumType &mymax)
scan the dataset(s) that have been added, and find the min and max.
casacore::FitToHalfStatistics::getNPts
uInt64 getNPts()
scan the dataset(s) that have been added, and find the number of good points.
casacore::uInt64
unsigned long long uInt64
Definition: aipsxtype.h:39
casacore::FitToHalfStatistics::_unweightedStats
void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride, const DataRanges &ranges, Bool isInclude)
no weights, no mask