MeanEstimate.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548
  1. #include "MeanEstimate.h"
  2. #include <cassert>
  3. #include <cmath>
  4. #include <limits>
  5. #include <algorithm>
  6. CDebugOutput::CDebugOutput()
  7. : Singleton< CDebugOutput >(),
  8. m_file( NULL ),
  9. currentIndex( 0 ),
  10. m_minIndex( -1 ),
  11. m_maxIndex( -1 )
  12. {
  13. }
  14. void CDebugOutput::Init()
  15. {
  16. m_diameter.clear();
  17. m_estimate2.clear();
  18. m_distensibility.clear();
  19. m_distensibility20.clear();
  20. m_minIndex = -1;
  21. m_maxIndex = -1;
  22. }
  23. void CDebugOutput::OpenFile()
  24. {
  25. m_file = fopen( "MathOutput.xls", "wt");
  26. }
  27. void CDebugOutput::WriteToFile()
  28. {
  29. OpenFile();
  30. //ASSERT( m_distensibility20.GetSize() == m_diameter.GetSize() );
  31. assert( m_diameter.size() == m_estimate2.size() );
  32. int distSize = (int)m_distensibility20.size();
  33. double mean=0;
  34. {//mean
  35. int valide=0;
  36. double value=0;
  37. for(int i=0; i<distSize; i++)
  38. {
  39. std::map< int, double >::iterator it = m_distensibility.find( i );
  40. if(it != m_distensibility.end())
  41. {
  42. value= it->second;
  43. if(value > 0)
  44. {
  45. mean+=value;
  46. valide++;
  47. }
  48. }
  49. }
  50. mean /= valide;
  51. }
  52. double diam, esti, dist, dist20;
  53. for(int i=0; i<distSize; i++)
  54. {
  55. std::map< int, double >::iterator it = m_diameter.find( i );
  56. diam = esti = dist = dist20 = -10;
  57. if(it == m_diameter.end())
  58. {
  59. fprintf(m_file, "%d\t%f\t \t \t \t \tNo eim\n",i, mean);
  60. //fprintf(m_file, "%d\t%f\t \t \t \t \tNo eim\n",i, mean);
  61. }
  62. else
  63. {
  64. diam = it->second;
  65. std::map< int, double >::iterator it2 = m_estimate2.find( i );
  66. esti = it2->second;
  67. if ( !CMeanEstimate::IsANumber( esti ))
  68. {
  69. fprintf(m_file, "%d\t%f\t%f\t \t \t \tNo eim within %lf\n",i, mean, diam, esti);
  70. }
  71. else
  72. {
  73. std::map< int, double >::iterator itd = m_distensibility.find( i );
  74. dist = itd->second;
  75. itd = m_distensibility20.find( i );
  76. dist20 = itd->second;
  77. if(dist20 == -2 )
  78. {
  79. fprintf(m_file, "%d\t%f\t%f\t%f\t%f\t \t OutOfBounds\n",i, mean, diam, esti, dist );
  80. }
  81. else
  82. {
  83. fprintf(m_file, "%d\t%f\t%f\t%f\t%f\t%f\t%s\n",i, mean, diam, esti, dist, dist20, (m_minIndex==i? "MIN":(m_maxIndex==i? "MAX":"")) );
  84. }
  85. }
  86. }
  87. }
  88. fclose(m_file);
  89. }
  90. CMeanEstimate::CMeanEstimate()
  91. {
  92. }
  93. CMeanEstimate::~CMeanEstimate(void)
  94. {
  95. }
  96. void CMeanEstimate::RemoveValues(std::vector< double > *A, double value)
  97. {
  98. A->erase( remove_if( A->begin(), A->end(), bind2nd( std::equal_to< double >(), value ) ), A->end() );
  99. /*
  100. double x;
  101. int size = (int)A->size();
  102. int count=0; //number removed;
  103. for(int i=0; i<size; i++)
  104. {
  105. x = A->GetAt(i);
  106. if( x == value )
  107. {
  108. A->RemoveAt(i);
  109. count++;
  110. i--;
  111. size--;
  112. }
  113. }
  114. */
  115. }
  116. struct is_not_a_number : public std::unary_function< double, bool >
  117. {
  118. bool operator() ( double x )
  119. {
  120. return !CMeanEstimate::IsANumber( x );
  121. }
  122. };
  123. void CMeanEstimate::RemoveNanValues(std::vector< double > *A) // Nan = Not A Number
  124. {
  125. A->erase( remove_if( A->begin(), A->end(), is_not_a_number() ), A->end() );
  126. /*
  127. double x;
  128. int size = (int)A->GetSize();
  129. int count=0; //number removed;
  130. for(int i=0; i<size; i++)
  131. {
  132. x = A->GetAt(i);
  133. if( ! CMeanEstimate::IsANumber(x) )
  134. {
  135. A->RemoveAt(i);
  136. count++;
  137. i--;
  138. size--;
  139. }
  140. }
  141. */
  142. }
  143. template < class Op1, class Op2, class Op3 >
  144. class binary_compose : public std::unary_function< typename Op2::argument_type,
  145. typename Op1::result_type >
  146. {
  147. public:
  148. binary_compose( const Op1& o1, const Op2& o2, const Op3& o3 ) : f1( o1 ),
  149. f2( o2 ),
  150. f3( o3 )
  151. {
  152. }
  153. typename Op1::result_type operator() ( const typename Op2::argument_type& x ) const
  154. {
  155. return f1( f2( x ), f3( x ) );
  156. }
  157. protected:
  158. Op1 f1;
  159. Op2 f2;
  160. Op3 f3;
  161. };
  162. template < class Op1, class Op2, class Op3 >
  163. inline binary_compose< Op1, Op2, Op3 > compose2( const Op1& f1, const Op2& f2, const Op3& f3 )
  164. {
  165. return binary_compose< Op1, Op2, Op3 >( f1, f2, f3 );
  166. }
  167. void CMeanEstimate::RemoveOutOfBoundsValues(std::vector< double > *A, double percent)
  168. {
  169. double mean = GetMeanEstimate(A, percent);
  170. double maxBound = mean * (1+percent);
  171. double minBound = mean * (1-percent);
  172. A->erase( remove_if( A->begin(), A->end(),
  173. compose2( std::logical_or< bool >(),
  174. std::bind2nd( std::less< double >(), minBound ),
  175. std::bind2nd( std::greater< double >(), maxBound ) ) ),
  176. A->end() );
  177. /*
  178. double x=0;
  179. int size = (int)A->GetSize();
  180. int count=0; //number removed;
  181. for(int i=0; i<size; i++)
  182. {
  183. x = A->GetAt(i);
  184. if( x > maxBound || x < minBound )
  185. {
  186. A->RemoveAt(i);
  187. count++;
  188. i--;
  189. size--;
  190. }
  191. }
  192. */
  193. }
  194. void CMeanEstimate::FindMinMaxWithinBoundsValues(std::vector< double > *A, double percent, double &min, double &max)
  195. {
  196. std::vector< double > A2(*A);
  197. RemoveValues(&A2, -1);
  198. RemoveValues(&A2, 0);
  199. RemoveNanValues(&A2);
  200. // min = std::numeric_limits<double>::max();
  201. // max = std::numeric_limits<double>::min();
  202. min = INT_MAX;
  203. max = INT_MIN;
  204. double mean = GetMeanEstimate(&A2, percent);
  205. double maxBound = mean * (1+percent);
  206. double minBound = mean * (1-percent);
  207. CDebugOutput::getInstance().m_maxIndex = -1;
  208. CDebugOutput::getInstance().m_minIndex = -1;
  209. double x=0;
  210. int i = 0;
  211. double value=-1;
  212. std::vector< double >::iterator
  213. it = A->begin(),
  214. ie = A->end();
  215. while (it != ie)
  216. {
  217. x = *it;
  218. std::map< int, double >::iterator iv = CDebugOutput::getInstance().m_distensibility.find( i );
  219. assert(iv != CDebugOutput::getInstance().m_distensibility.end());
  220. value = iv->second;
  221. assert( x == value );
  222. if( !CMeanEstimate::IsANumber(x)
  223. || x<=0){
  224. CDebugOutput::getInstance().m_distensibility20.insert( std::make_pair(i, -1 ) );
  225. }else{
  226. if (x > maxBound || x < minBound )
  227. {
  228. CDebugOutput::getInstance().m_distensibility20.insert( std::make_pair(i, -2) );
  229. }else{
  230. CDebugOutput::getInstance().m_distensibility20.insert( std::make_pair(i, x) );
  231. if( x > max ){
  232. CDebugOutput::getInstance().m_maxIndex = i;
  233. max = x;
  234. }else{
  235. if(x < min ){
  236. CDebugOutput::getInstance().m_minIndex = i;
  237. min = x;
  238. }
  239. }
  240. }
  241. }
  242. i++;
  243. ++it;
  244. }
  245. assert(CDebugOutput::getInstance().m_maxIndex > 0 );
  246. assert(CDebugOutput::getInstance().m_minIndex > 0 );
  247. }
  248. void CMeanEstimate::FindMinMax(std::vector< double > *A, double &min, double &max)
  249. {
  250. // min = std::numeric_limits<double>::max();
  251. // max = std::numeric_limits<double>::min();
  252. min = INT_MAX;
  253. max = INT_MIN;
  254. //min = max = A->GetAt(0);
  255. double x=0;
  256. std::vector< double >::iterator
  257. it = A->begin(),
  258. ie = A->end();
  259. while ( it != ie )
  260. {
  261. x = *it;
  262. if( x > max ) max = x;
  263. if(x>0 && x < min ) min = x;
  264. ++it;
  265. }
  266. }
  267. double CMeanEstimate::GetMeanEstimate(std::vector< double > *A)
  268. {
  269. return GetMeanEstimate(A, VARIATION_COEF);
  270. }
  271. void CMeanEstimate::startFileOutput(char *filename)
  272. {
  273. CDebugOutput::getInstance().m_file = fopen(filename, "a");
  274. assert(CDebugOutput::getInstance().m_file);
  275. }
  276. void CMeanEstimate::titleFileOutput(char * /*title*/, std::vector< double > * /*A*/, std::vector< double > * /*B*/)
  277. {
  278. // fprintf(m_file, "\n\n%s\n",title);
  279. // fprintf(m_file, "Sizes\n %d \t %d \n\n", A->GetSize(), B->GetSize());
  280. // fprintf(m_file, "Mean\n %f \t %f \n\n", CMeanEstimate::GetMean(A), CMeanEstimate::GetMean(B));
  281. // fprintf(m_file, "StandardDeviation\n %f \t %f \n\n", CMeanEstimate::GetStandardDeviation(A), CMeanEstimate::GetStandardDeviation(B));
  282. }
  283. void CMeanEstimate::titleFileOutput(std::vector< double > *A)
  284. {
  285. fprintf(CDebugOutput::getInstance().m_file, "%f\t%f\t%d\t",GetMean(A),GetMeanEstimate(A), (int)A->size());
  286. // fprintf(m_file, "\n\n%s\n",title);
  287. // fprintf(m_file, "Sizes\n %d \t %d \n\n", A->GetSize(), B->GetSize());
  288. // fprintf(m_file, "Mean\n %f \t %f \n\n", CMeanEstimate::GetMean(A), CMeanEstimate::GetMean(B));
  289. // fprintf(m_file, "StandardDeviation\n %f \t %f \n\n", CMeanEstimate::GetStandardDeviation(A), CMeanEstimate::GetStandardDeviation(B));
  290. }
  291. void CMeanEstimate::writeFileOutput(char* text)
  292. {
  293. fprintf(CDebugOutput::getInstance().m_file,text);
  294. }
  295. void CMeanEstimate::endFileOutput()
  296. {
  297. fprintf(CDebugOutput::getInstance().m_file, "\n");
  298. fclose(CDebugOutput::getInstance().m_file);
  299. }
  300. double CMeanEstimate::GetMeanEstimate(std::vector< double > *A, double similarityCoef)
  301. {
  302. double mean=GetMean(A);
  303. double x=0, sum = 0 ;
  304. int count=0;
  305. //double maxDeviation = mean * similarityCoef;
  306. std::vector< double >::iterator
  307. it = A->begin(),
  308. ie = A->end();
  309. while ( it != ie )
  310. {
  311. x = *it;
  312. //if(maxDeviation> fabs(x-mean)){
  313. if( x*similarityCoef > fabs(x-mean) )
  314. {
  315. sum+=x;
  316. count++;
  317. }
  318. else
  319. {
  320. int xxx=0; xxx++;
  321. }
  322. ++it;
  323. }
  324. // VERIFY(count);
  325. return sum / (double)count;
  326. }
  327. double CMeanEstimate::GetMeanEstimateFileOutput(std::vector< double > *A, double similarityCoef)
  328. {
  329. double mean=GetMean(A);
  330. double x=0, sum = 0 ;
  331. int count=0;
  332. //double maxDeviation = mean * similarityCoef;
  333. std::vector< double >::iterator
  334. it = A->begin(),
  335. ie = A->end();
  336. while ( it != ie )
  337. {
  338. x = *it;
  339. //if(maxDeviation> fabs(x-mean)){
  340. if( x*similarityCoef > fabs(x-mean) )
  341. {
  342. sum+=x;
  343. count++;
  344. }
  345. ++it;
  346. }
  347. //#define OUTPUT
  348. #ifdef OUTPUT
  349. fprintf(m_file, "%d",count);
  350. #endif
  351. //VERIFY(count);
  352. return sum / (double)count;
  353. }
  354. void CMeanEstimate::PrintMeanEstimate(std::vector< double > *A, std::vector< double > *B)
  355. {
  356. PrintMeanEstimate(A,B, VARIATION_COEF);
  357. }
  358. void CMeanEstimate::PrintMeanEstimate(std::vector< double > *A, std::vector< double > *B, double similarityCoef)
  359. {
  360. double mean =GetMean(A);
  361. double meanB=GetMean(B);
  362. double x=0;
  363. int count=0, countB=0;
  364. //fprintf(m_file, "MeanEstimate, coef:%f\n %f \t%f\n\n",similarityCoef, GetMeanEstimate(A,similarityCoef), GetMeanEstimate(B,similarityCoef));
  365. //fprintf(m_file, "AA\t exclu=0\t\tII\t exclu=0\n");
  366. //double maxDeviation = mean * similarityCoef;
  367. //double maxDeviationB = meanB * similarityCoef;
  368. assert( B->size() <= A->size() );
  369. std::vector< double >::iterator
  370. ia = A->begin(),
  371. ae = A->end(),
  372. ib = B->begin();
  373. while ( ia != ae )
  374. {
  375. x = *ia;
  376. //if(maxDeviation > fabs(x-mean) ){
  377. if ( x*similarityCoef > fabs(x-mean) )
  378. {
  379. //sum+=x;
  380. //fprintf(m_file, "%f\t1\t", x);
  381. }
  382. else
  383. {
  384. count++;
  385. //fprintf(m_file, "%f\t0\t", x);
  386. }
  387. x = *ib;
  388. //fprintf(m_file, "\t");
  389. //if(maxDeviationB> fabs(x-meanB) ){
  390. if( x*similarityCoef > fabs(x-meanB) )
  391. {
  392. //sumB+=x;
  393. //fprintf(m_file, "%f\t1\t", x);
  394. }
  395. else
  396. {
  397. countB++;
  398. //fprintf(m_file, "%f\t0\t", x);
  399. }
  400. //fprintf(m_file, "\n");
  401. ++ia;
  402. ++ib;
  403. }
  404. //fprintf(m_file, "\nnombre de points exclus:\t%d\t\t\t%d\n",count, countB );
  405. }
  406. bool CMeanEstimate::IsANumber(double x)
  407. {
  408. // return ((x < (double)(std::numeric_limits<double>::max())) &&
  409. // (x > (double)(std::numeric_limits<double>::min())));
  410. return ((x < (double)(INT_MAX)) &&
  411. (x > (double)(INT_MIN)));
  412. }
  413. double CMeanEstimate::GetMean(std::vector< double > *A)
  414. {
  415. double sum=0;
  416. double x=0;
  417. std::vector< double >::iterator
  418. it = A->begin(),
  419. ie = A->end();
  420. while ( it != ie )
  421. {
  422. x = *it;
  423. if( !IsANumber(x) )
  424. {
  425. int xxx=0; xxx++;
  426. } else
  427. // if( x == std::numeric_limits<double>::quiet_NaN() ){
  428. // int xxx=0; xxx++;
  429. // }
  430. // if( abs(x) == std::numeric_limits<double>::infinity() ){
  431. // int xxx=0; xxx++;
  432. // }
  433. sum+= x;
  434. ++it;
  435. }
  436. return sum / (double)A->size();
  437. }
  438. double CMeanEstimate::GetStandardDeviation(std::vector< double > *A)
  439. {
  440. return sqrt(GetVariance(A));
  441. }
  442. double CMeanEstimate::GetVariance(std::vector< double > *A)
  443. {
  444. double mean=GetMean(A);
  445. double sum=0, diff=0;
  446. std::vector< double >::iterator
  447. it = A->begin(),
  448. ie = A->end();
  449. while ( it != ie )
  450. {
  451. diff = *it - mean;
  452. sum+= diff*diff;
  453. ++it;
  454. }
  455. return sum / (double)A->size();
  456. }
  457. //void CMeanEstimate::OutputToFile(char* filename, CArray<double> *A, CArray<double> *B)
  458. ////implementation rapide et specifique est repectant peu le principde objet !!
  459. //{
  460. // FILE* file = fopen(filename, "a");
  461. //
  462. // if(file==0){
  463. // ASSERT(file);
  464. // }else{
  465. // fprintf(file, "Mean\n %f \t %f \n", CMeanEstimate::GetMean(A), CMeanEstimate::GetMean(B));
  466. // fprintf(file, "StandardDeviation\n %f \t %f \n", CMeanEstimate::GetStandardDeviation(A), CMeanEstimate::GetStandardDeviation(B));
  467. // fprintf(file, "\nValues:\nAA \tII\n");
  468. // ASSERT(A->GetSize()==B->GetSize());
  469. // for(int i=A->GetSize()-1; i; i--){
  470. // fprintf(file, "%g \t %g \n", A->GetAt(i), B->GetAt(i));
  471. // }
  472. // fprintf(file, "\n\n");
  473. // fclose(file);
  474. // }
  475. //}