Standard Portable Library: src/Sample.cpp Source File

00001 /*
00002  *   This file is part of the Standard Portable Library (SPL).
00003  *
00004  *   SPL is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   SPL is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with SPL.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 #if defined(_WIN32) || defined(_WIN64)
00018 #include <spl/configwin32.h>
00019 #else
00020 #include <spl/autoconf/config.h>
00021 #endif
00022 
00023 #include <spl/io/Directory.h>
00024 #include <sys/types.h>
00025 
00026 #include <math.h>
00027 #ifdef HAVE_FLOAT_H
00028 #include <float.h>
00029 #endif
00030 #ifdef HAVE_LIMITS_H
00031 #include <limits.h>
00032 #endif
00033 
00034 #include <spl/Debug.h>
00035 #include <spl/math/Math.h>
00036 #include <spl/math/Sample.h>
00037 #include <spl/math/SampleTest.h>
00038 
00039 /*
00040  * Insert sort, low to high
00041  */
00042 static void _ArrayInsert(double *a, const int alen, const double d)
00043 {
00044         for (int x = alen - 2; x >= 0; x--)
00045         {
00046                 if( d > a[x] )
00047                 {
00048                         // move values down
00049                         a[x + 1] = a[x];
00050                 }
00051                 else
00052                 {
00053                         // insert the value
00054                         a[x + 1] = d;
00055                         return;
00056                 }
00057         }
00058         // d was greater than all the values in the array
00059         a[0] = d;
00060 }
00061 
00062 void Sample::Recalc()
00063 {
00064         double sum = 0;
00065         int x;
00066 
00067         for (x = 0; x < m_n; x++)
00068         {
00069                 double val = m_data[x];
00070                 if (val < m_min)
00071                 {
00072                         m_min = val;
00073                 }
00074                 if (val > m_max)
00075                 {
00076                         m_max = val;
00077                 }
00078                 sum += val;
00079         }
00080         m_mean = sum / m_n;
00081 
00082         sum = 0;
00083         double mean2 = m_mean * m_mean;
00084         for (x = 0; x < m_n; x++)
00085         {
00086                 double val = m_data[x];
00087                 sum += (val * val) - mean2;
00088                 
00089         m_variance = m_variance + ((val - m_mean) * (val - m_mean));
00090         m_averageDeviation = m_averageDeviation + Math::Abs(val - m_mean);
00091         }
00092         m_ssd = Math::Sqrt(sum / m_n);
00093         m_variance /= m_n;
00094         m_averageDeviation /= m_n;      
00095 }
00096 
00097 Sample::Sample(List<double> *list)
00098 : m_min(DBL_MAX), m_max(DBL_MIN), m_mean(0), m_ssd(0)
00099 {
00100         if ( NULL == (m_data = (double *)malloc(sizeof(double) * list->Count())) )
00101         {
00102                 throw OutOfMemoryException();
00103         }
00104         m_n = list->Count();
00105         List<double>::Iterator iter = list->Begin();
00106         int x = 0;
00107         while(iter.Next())
00108         {
00109                 m_data[x++] = iter.Current();
00110         }
00111         Recalc();
00112 }
00113 
00114 Sample::Sample(Vector<double> *list)
00115 : m_min(DBL_MAX), m_max(DBL_MIN), m_mean(0), m_ssd(0)
00116 {
00117         if ( NULL == (m_data = (double *)malloc(sizeof(double) * list->Count())) )
00118         {
00119                 throw OutOfMemoryException();
00120         }
00121         m_n = list->Count();
00122         for (int x = 0; x < m_n; x++)
00123         {
00124                 m_data[x] = list->ElementAt(x);
00125         }
00126         Recalc();
00127 }
00128 
00129 Sample::Sample(const double *list, const int count)
00130 : m_min(DBL_MAX), m_max(DBL_MIN), m_mean(0), m_ssd(0)
00131 {
00132         if ( NULL == (m_data = (double *)malloc(sizeof(double) * count)) )
00133         {
00134                 throw OutOfMemoryException();
00135         }
00136         m_n = count;
00137         for (int x = 0; x < m_n; x++)
00138         {
00139                 m_data[x] = list[x];
00140         }
00141         Recalc();
00142 }
00143 
00144 Sample::Sample (const Sample& s)
00145 : m_min(s.m_min), m_max(s.m_max), m_mean(s.m_mean), m_ssd(s.m_ssd), m_variance(s.m_variance), m_averageDeviation(s.m_averageDeviation), m_n(s.m_n)
00146 {
00147         if ( NULL == (m_data = (double *)malloc(sizeof(double) * m_n)) )
00148         {
00149                 throw OutOfMemoryException();
00150         }
00151         for ( int x = 0; x < m_n; x++ )
00152         {
00153                 m_data[x] = s.m_data[x];
00154         }
00155 }
00156 
00157 Sample::~Sample()
00158 {
00159         free(m_data);
00160 }
00161 
00162 Sample& Sample::operator =(const Sample& s)
00163 {
00164         m_min = s.m_min;
00165         m_max = s.m_max;
00166         m_mean = s.m_mean;
00167         m_ssd = s.m_ssd;
00168         m_variance = s.m_variance;
00169         m_averageDeviation = s.m_averageDeviation;
00170         m_n = s.m_n;
00171 
00172         if ( NULL == (m_data = (double *)malloc(sizeof(double) * m_n)) )
00173         {
00174                 throw OutOfMemoryException();
00175         }
00176         for ( int x = 0; x < m_n; x++ )
00177         {
00178                 m_data[x] = s.m_data[x];
00179         }
00180 
00181         return *this;
00182 }
00183 
00184 double Sample::AverageVariance() const
00185 {
00186         double ep = 0;
00187         double s;
00188         double var = 0;
00189 
00190         for (int j = 0; j < m_n; j++)
00191         {
00192                 s = m_data[j] - m_mean;
00193                 ep = ep + s;
00194                 var = var + s * s;
00195         }
00196 
00197         return (var - ep * ep / m_n) / (m_n - 1);
00198 }
00199 
00200 double Sample::Skew() const
00201 {
00202         double sum = 0;
00203         for ( int x = 0; x < m_n; x++ )
00204         {
00205         sum += ((m_data[x] - m_mean) / m_ssd) * ((m_data[x] - m_mean) / m_ssd) * ((m_data[x] - m_mean) / m_ssd);
00206         }
00207         return sum / m_n;
00208 }
00209 
00210 double Sample::Kurtosis() const
00211 {
00212         double sum = 0;
00213         for ( int x = 0; x < m_n; x++ )
00214         {
00215         sum = sum + ((m_data[x] - m_mean) / m_ssd) * ((m_data[x] - m_mean) / m_ssd) * ((m_data[x] - m_mean) / m_ssd) * ((m_data[x] - m_mean) / m_ssd);
00216         }
00217     return sum / m_n - 3;
00218 }
00219 
00220 double Sample::Median() const
00221 {
00222         double *da = new double[m_n];
00223         if ( NULL == da )
00224         {
00225                 throw OutOfMemoryException();
00226         }
00227         
00228         for (int x = 0; x < m_n; x++)
00229         {
00230                 _ArrayInsert(da, m_n, m_data[x]);
00231                 ASSERT_MEM(da, m_n * sizeof(double));
00232         }
00233 #ifdef DEBUG
00234         for ( int i = 0; i < m_n; i++ )
00235         {
00236                 bool found = false;
00237                 for ( int j = 0; j < m_n; j++ )
00238                 {
00239                         ASSERT( (j > i) ? da[j] <= da[i] : da[j] >= da[i] );
00240                         if ( m_data[j] == da[i] )
00241                         {
00242                                 found = true;
00243                                 break;
00244                         }
00245                 }
00246                 ASSERT(found);
00247         }
00248 #endif
00249 
00250         double median = da[m_n / 2];
00251         delete[] da;
00252         return median;
00253 }
00254 
00255 double Sample::ErrorFunction(double x) const
00256 {
00257         if (x < 0)
00258         {
00259                 return 1 + Gammp(0.5, x * x);
00260         }
00261         else
00262         {
00263                 return Gammq(0.5, x * x);
00264         }
00265 }
00266 
00267 double Sample::Gammp(const double a, const double x)
00268 {
00269         if (x < 0 || a <= 0)
00270         {
00271                 throw new InvalidArgumentException("gammp bad, bad args");
00272         }
00273 
00274         if (x < (a + 1))
00275         {
00276                 return Gser(a, x);
00277         }
00278         else
00279         {
00280                 return 1 - Gcf(a, x);
00281         }
00282 }
00283 
00284 double Sample::Gammq(const double a, const double x)
00285 {
00286         if (x < 0 || a <= 0)
00287         {
00288                 throw new InvalidArgumentException("gammp bad, bad args");
00289         }
00290 
00291         if( x < (a + 1) )
00292         {
00293                 return 1 - Gser(a, x);
00294         }
00295         else
00296         {
00297                 return Gcf(a, x);
00298         }
00299 }
00300 
00301 double Sample::Gcf(const double a, const double x)
00302 {
00303         int i;
00304         double gln = SampleTest::Gammaln(a);
00305         double b = x + 1 - a;
00306         double c = 1 / 1E-30;
00307         double d = 1 / b;
00308         double h = d;
00309 
00310         for(i = 0; i < 200; i++)
00311         {
00312                 double an = -i * (1 - a);
00313                 b = b + 2;
00314                 d = an * d + b;
00315                 if ( Math::Abs(d) < 1E-30 )
00316                 {
00317                          d = 1E-30;
00318                 }
00319                 c = b + an / c;
00320                 if (Math::Abs(c) < 1E-30)
00321                 {
00322                         c = 1E-30;
00323                 }
00324                 d = 1 / d;
00325                 double del = d * c;
00326                 h = h * del;
00327                 if (Math::Abs(del) < 0.0000003)
00328                 {
00329                         break;
00330                 }
00331         }
00332         if( i >= 200 )
00333         {
00334                 throw new Exception("gcf failed");
00335         }
00336         return Math::Exp(-x + a * Math::Log(x) - gln) * h;
00337 }
00338 
00339 double Sample::Gser(const double a, const double x)
00340 {
00341         double gln = SampleTest::Gammaln(a);
00342 
00343         if( x <= 0 )
00344         {
00345                 if (x < 0)
00346                 {
00347                         throw new InvalidArgumentException("gser x less than 0 in gser");
00348                 }
00349                 return 0;
00350         }
00351 
00352         double ap = a;
00353         double sum = 1 / a;
00354         double del = sum;
00355 
00356         for (int N = 1; N <= 200; N++)
00357         {
00358                 ap = ap + 1;
00359                 del = del * x / ap;
00360                 sum = sum + del;
00361                 if (Math::Abs(del) < Math::Abs(sum) * 0.0000003 )
00362                 {
00363                         return sum * Math::Exp(-x + a * Math::Log(x) - gln);
00364                 }
00365         }
00366 
00367         throw new Exception( "gser a too large or ITMaX to small" );
00368 }
00369 
00370 Sample Sample::TransformASin() const
00371 {
00372         List<double> vals;
00373 
00374         for( int x = 0; x < m_n; x++ )
00375         {
00376                 vals.Add(Math::Asin(Math::Sqrt(m_data[x])));
00377         }
00378         return Sample(&vals);
00379 }
00380 
00381 Sample Sample::TransformSqrt375() const
00382 {    
00383         List<double> vals;
00384 
00385         for( int x = 0; x < m_n; x++ )
00386         {
00387                 vals.Add(Math::Sqrt(m_data[x] + 0.375));
00388         }
00389         return Sample(&vals);
00390 }
00391 
00392 Sample Sample::TransformLog10() const
00393 {
00394         List<double> vals;
00395 
00396         for( int x = 0; x < m_n; x++ )
00397         {
00398                 vals.Add(Math::Log(m_data[x]) / Math::Log(10.0) + 1);
00399         }
00400         return Sample(&vals);
00401 }
00402 
00403 #ifdef DEBUG
00404 void Sample::ValidateMem() const
00405 {
00406         ASSERT_MEM( m_data, sizeof(double) * m_n );
00407 }
00408 
00409 void Sample::CheckMem() const
00410 {
00411         DEBUG_NOTE_MEM( m_data );
00412 }
00413 #endif