MeanEstimate.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. #include "MeanEstimate.h"
  2. #include <cassert>
  3. #include <cmath>
  4. #include <limits>
  5. #include <algorithm>
  6. CDebugOutput::CDebugOutput()
  7. : Singleton< CDebugOutput >(),
  8. m_file( NULL ),
  9. currentIndex( 0 ),
  10. m_minIndex( -1 ),
  11. m_maxIndex( -1 )
  12. {
  13. }
  14. void CDebugOutput::Init()
  15. {
  16. m_diameter.clear();
  17. m_estimate2.clear();
  18. m_distensibility.clear();
  19. m_distensibility20.clear();
  20. m_minIndex = -1;
  21. m_maxIndex = -1;
  22. }
  23. void CDebugOutput::OpenFile()
  24. {
  25. m_file = fopen( "MathOutput.xls", "wt");
  26. }
  27. void CDebugOutput::WriteToFile()
  28. {
  29. OpenFile();
  30. //ASSERT( m_distensibility20.GetSize() == m_diameter.GetSize() );
  31. assert( m_diameter.size() == m_estimate2.size() );
  32. int distSize = (int)m_distensibility20.size();
  33. double mean=0;
  34. {//mean
  35. int valide=0;
  36. double value=0;
  37. for(int i=0; i<distSize; i++)
  38. {
  39. std::map< int, double >::iterator it = m_distensibility.find( i );
  40. if(it != m_distensibility.end())
  41. {
  42. value= it->second;
  43. if(value > 0)
  44. {
  45. mean+=value;
  46. valide++;
  47. }
  48. }
  49. }
  50. mean /= valide;
  51. }
  52. double diam, esti, dist, dist20;
  53. for(int i=0; i<distSize; i++)
  54. {
  55. std::map< int, double >::iterator it = m_diameter.find( i );
  56. diam = esti = dist = dist20 = -10;
  57. if(it == m_diameter.end())
  58. {
  59. fprintf(m_file, "%d\t%f\t \t \t \t \tNo eim\n",i, mean);
  60. //fprintf(m_file, "%d\t%f\t \t \t \t \tNo eim\n",i, mean);
  61. }
  62. else
  63. {
  64. diam = it->second;
  65. std::map< int, double >::iterator it2 = m_estimate2.find( i );
  66. esti = it2->second;
  67. if ( !CMeanEstimate::IsANumber( esti ))
  68. {
  69. fprintf(m_file, "%d\t%f\t%f\t \t \t \tNo eim within %lf\n",i, mean, diam, esti);
  70. }
  71. else
  72. {
  73. std::map< int, double >::iterator itd = m_distensibility.find( i );
  74. dist = itd->second;
  75. itd = m_distensibility20.find( i );
  76. dist20 = itd->second;
  77. if(dist20 == -2 )
  78. {
  79. fprintf(m_file, "%d\t%f\t%f\t%f\t%f\t \t OutOfBounds\n",i, mean, diam, esti, dist );
  80. }
  81. else
  82. {
  83. fprintf(m_file, "%d\t%f\t%f\t%f\t%f\t%f\t%s\n",i, mean, diam, esti, dist, dist20, (m_minIndex==i? "MIN":(m_maxIndex==i? "MAX":"")) );
  84. }
  85. }
  86. }
  87. }
  88. fclose(m_file);
  89. }
  90. CMeanEstimate::CMeanEstimate()
  91. {
  92. }
  93. CMeanEstimate::~CMeanEstimate(void)
  94. {
  95. }
  96. void CMeanEstimate::RemoveValues(std::vector< double > *A, double value)
  97. {
  98. A->erase( remove_if( A->begin(), A->end(), bind2nd( std::equal_to< double >(), value ) ), A->end() );
  99. /*
  100. double x;
  101. int size = (int)A->size();
  102. int count=0; //number removed;
  103. for(int i=0; i<size; i++)
  104. {
  105. x = A->GetAt(i);
  106. if( x == value )
  107. {
  108. A->RemoveAt(i);
  109. count++;
  110. i--;
  111. size--;
  112. }
  113. }
  114. */
  115. }
  116. struct is_not_a_number : public std::unary_function< double, bool >
  117. {
  118. bool operator() ( double x )
  119. {
  120. return !CMeanEstimate::IsANumber( x );
  121. }
  122. };
  123. void CMeanEstimate::RemoveNanValues(std::vector< double > *A) // Nan = Not A Number
  124. {
  125. A->erase( remove_if( A->begin(), A->end(), is_not_a_number() ), A->end() );
  126. /*
  127. double x;
  128. int size = (int)A->GetSize();
  129. int count=0; //number removed;
  130. for(int i=0; i<size; i++)
  131. {
  132. x = A->GetAt(i);
  133. if( ! CMeanEstimate::IsANumber(x) )
  134. {
  135. A->RemoveAt(i);
  136. count++;
  137. i--;
  138. size--;
  139. }
  140. }
  141. */
  142. }
  143. template < class Op1, class Op2, class Op3 >
  144. class binary_compose : public std::unary_function< typename Op2::argument_type,
  145. typename Op1::result_type >
  146. {
  147. public:
  148. binary_compose( const Op1& o1, const Op2& o2, const Op3& o3 ) : f1( o1 ),
  149. f2( o2 ),
  150. f3( o3 )
  151. {
  152. }
  153. typename Op1::result_type operator() ( const typename Op2::argument_type& x ) const
  154. {
  155. return f1( f2( x ), f3( x ) );
  156. }
  157. protected:
  158. Op1 f1;
  159. Op2 f2;
  160. Op3 f3;
  161. };
  162. template < class Op1, class Op2, class Op3 >
  163. inline binary_compose< Op1, Op2, Op3 > compose2( const Op1& f1, const Op2& f2, const Op3& f3 )
  164. {
  165. return binary_compose< Op1, Op2, Op3 >( f1, f2, f3 );
  166. }
  167. void CMeanEstimate::RemoveOutOfBoundsValues(std::vector< double > *A, double percent)
  168. {
  169. double mean = GetMeanEstimate(A, percent);
  170. double maxBound = mean * (1+percent);
  171. double minBound = mean * (1-percent);
  172. A->erase( remove_if( A->begin(), A->end(),
  173. compose2( std::logical_or< bool >(),
  174. std::bind2nd( std::less< double >(), minBound ),
  175. std::bind2nd( std::greater< double >(), maxBound ) ) ),
  176. A->end() );
  177. /*
  178. double x=0;
  179. int size = (int)A->GetSize();
  180. int count=0; //number removed;
  181. for(int i=0; i<size; i++)
  182. {
  183. x = A->GetAt(i);
  184. if( x > maxBound || x < minBound )
  185. {
  186. A->RemoveAt(i);
  187. count++;
  188. i--;
  189. size--;
  190. }
  191. }
  192. */
  193. }
  194. void CMeanEstimate::FindMinMaxWithinBoundsValues(std::vector< double > *A, double percent, double &min, double &max)
  195. {
  196. std::vector< double > A2(*A);
  197. RemoveValues(&A2, -1);
  198. RemoveValues(&A2, 0);
  199. RemoveNanValues(&A2);
  200. min = std::numeric_limits<double>::max();
  201. max = std::numeric_limits<double>::min();
  202. double mean = GetMeanEstimate(&A2, percent);
  203. double maxBound = mean * (1+percent);
  204. double minBound = mean * (1-percent);
  205. CDebugOutput::getInstance().m_maxIndex = -1;
  206. CDebugOutput::getInstance().m_minIndex = -1;
  207. double x=0;
  208. int i = 0;
  209. double value=-1;
  210. std::vector< double >::iterator
  211. it = A->begin(),
  212. ie = A->end();
  213. while (it != ie)
  214. {
  215. x = *it;
  216. std::map< int, double >::iterator iv = CDebugOutput::getInstance().m_distensibility.find( i );
  217. assert(iv != CDebugOutput::getInstance().m_distensibility.end());
  218. value = iv->second;
  219. assert( x == value );
  220. if( !CMeanEstimate::IsANumber(x)
  221. || x<=0){
  222. CDebugOutput::getInstance().m_distensibility20.insert( std::make_pair(i, -1 ) );
  223. }else{
  224. if (x > maxBound || x < minBound )
  225. {
  226. CDebugOutput::getInstance().m_distensibility20.insert( std::make_pair(i, -2) );
  227. }else{
  228. CDebugOutput::getInstance().m_distensibility20.insert( std::make_pair(i, x) );
  229. if( x > max ){
  230. CDebugOutput::getInstance().m_maxIndex = i;
  231. max = x;
  232. }else{
  233. if(x < min ){
  234. CDebugOutput::getInstance().m_minIndex = i;
  235. min = x;
  236. }
  237. }
  238. }
  239. }
  240. i++;
  241. ++it;
  242. }
  243. assert(CDebugOutput::getInstance().m_maxIndex > 0 );
  244. assert(CDebugOutput::getInstance().m_minIndex > 0 );
  245. }
  246. void CMeanEstimate::FindMinMax(std::vector< double > *A, double &min, double &max)
  247. {
  248. min = std::numeric_limits<double>::max();
  249. max = std::numeric_limits<double>::min();
  250. //min = max = A->GetAt(0);
  251. double x=0;
  252. std::vector< double >::iterator
  253. it = A->begin(),
  254. ie = A->end();
  255. while ( it != ie )
  256. {
  257. x = *it;
  258. if( x > max ) max = x;
  259. if(x>0 && x < min ) min = x;
  260. ++it;
  261. }
  262. }
  263. double CMeanEstimate::GetMeanEstimate(std::vector< double > *A)
  264. {
  265. return GetMeanEstimate(A, VARIATION_COEF);
  266. }
  267. void CMeanEstimate::startFileOutput(char *filename)
  268. {
  269. CDebugOutput::getInstance().m_file = fopen(filename, "a");
  270. assert(CDebugOutput::getInstance().m_file);
  271. }
  272. void CMeanEstimate::titleFileOutput(char * /*title*/, std::vector< double > * /*A*/, std::vector< double > * /*B*/)
  273. {
  274. // fprintf(m_file, "\n\n%s\n",title);
  275. // fprintf(m_file, "Sizes\n %d \t %d \n\n", A->GetSize(), B->GetSize());
  276. // fprintf(m_file, "Mean\n %f \t %f \n\n", CMeanEstimate::GetMean(A), CMeanEstimate::GetMean(B));
  277. // fprintf(m_file, "StandardDeviation\n %f \t %f \n\n", CMeanEstimate::GetStandardDeviation(A), CMeanEstimate::GetStandardDeviation(B));
  278. }
  279. void CMeanEstimate::titleFileOutput(std::vector< double > *A)
  280. {
  281. fprintf(CDebugOutput::getInstance().m_file, "%f\t%f\t%d\t",GetMean(A),GetMeanEstimate(A), (int)A->size());
  282. // fprintf(m_file, "\n\n%s\n",title);
  283. // fprintf(m_file, "Sizes\n %d \t %d \n\n", A->GetSize(), B->GetSize());
  284. // fprintf(m_file, "Mean\n %f \t %f \n\n", CMeanEstimate::GetMean(A), CMeanEstimate::GetMean(B));
  285. // fprintf(m_file, "StandardDeviation\n %f \t %f \n\n", CMeanEstimate::GetStandardDeviation(A), CMeanEstimate::GetStandardDeviation(B));
  286. }
  287. void CMeanEstimate::writeFileOutput(char* text)
  288. {
  289. fprintf(CDebugOutput::getInstance().m_file,text);
  290. }
  291. void CMeanEstimate::endFileOutput()
  292. {
  293. fprintf(CDebugOutput::getInstance().m_file, "\n");
  294. fclose(CDebugOutput::getInstance().m_file);
  295. }
  296. double CMeanEstimate::GetMeanEstimate(std::vector< double > *A, double similarityCoef)
  297. {
  298. double mean=GetMean(A);
  299. double x=0, sum = 0 ;
  300. int count=0;
  301. //double maxDeviation = mean * similarityCoef;
  302. std::vector< double >::iterator
  303. it = A->begin(),
  304. ie = A->end();
  305. while ( it != ie )
  306. {
  307. x = *it;
  308. //if(maxDeviation> fabs(x-mean)){
  309. if( x*similarityCoef > fabs(x-mean) )
  310. {
  311. sum+=x;
  312. count++;
  313. }
  314. else
  315. {
  316. int xxx=0; xxx++;
  317. }
  318. ++it;
  319. }
  320. // VERIFY(count);
  321. return sum / (double)count;
  322. }
  323. double CMeanEstimate::GetMeanEstimateFileOutput(std::vector< double > *A, double similarityCoef)
  324. {
  325. double mean=GetMean(A);
  326. double x=0, sum = 0 ;
  327. int count=0;
  328. //double maxDeviation = mean * similarityCoef;
  329. std::vector< double >::iterator
  330. it = A->begin(),
  331. ie = A->end();
  332. while ( it != ie )
  333. {
  334. x = *it;
  335. //if(maxDeviation> fabs(x-mean)){
  336. if( x*similarityCoef > fabs(x-mean) )
  337. {
  338. sum+=x;
  339. count++;
  340. }
  341. ++it;
  342. }
  343. //#define OUTPUT
  344. #ifdef OUTPUT
  345. fprintf(m_file, "%d",count);
  346. #endif
  347. //VERIFY(count);
  348. return sum / (double)count;
  349. }
  350. void CMeanEstimate::PrintMeanEstimate(std::vector< double > *A, std::vector< double > *B)
  351. {
  352. PrintMeanEstimate(A,B, VARIATION_COEF);
  353. }
  354. void CMeanEstimate::PrintMeanEstimate(std::vector< double > *A, std::vector< double > *B, double similarityCoef)
  355. {
  356. double mean =GetMean(A);
  357. double meanB=GetMean(B);
  358. double x=0;
  359. int count=0, countB=0;
  360. //fprintf(m_file, "MeanEstimate, coef:%f\n %f \t%f\n\n",similarityCoef, GetMeanEstimate(A,similarityCoef), GetMeanEstimate(B,similarityCoef));
  361. //fprintf(m_file, "AA\t exclu=0\t\tII\t exclu=0\n");
  362. //double maxDeviation = mean * similarityCoef;
  363. //double maxDeviationB = meanB * similarityCoef;
  364. assert( B->size() <= A->size() );
  365. std::vector< double >::iterator
  366. ia = A->begin(),
  367. ae = A->end(),
  368. ib = B->begin();
  369. while ( ia != ae )
  370. {
  371. x = *ia;
  372. //if(maxDeviation > fabs(x-mean) ){
  373. if ( x*similarityCoef > fabs(x-mean) )
  374. {
  375. //sum+=x;
  376. //fprintf(m_file, "%f\t1\t", x);
  377. }
  378. else
  379. {
  380. count++;
  381. //fprintf(m_file, "%f\t0\t", x);
  382. }
  383. x = *ib;
  384. //fprintf(m_file, "\t");
  385. //if(maxDeviationB> fabs(x-meanB) ){
  386. if( x*similarityCoef > fabs(x-meanB) )
  387. {
  388. //sumB+=x;
  389. //fprintf(m_file, "%f\t1\t", x);
  390. }
  391. else
  392. {
  393. countB++;
  394. //fprintf(m_file, "%f\t0\t", x);
  395. }
  396. //fprintf(m_file, "\n");
  397. ++ia;
  398. ++ib;
  399. }
  400. //fprintf(m_file, "\nnombre de points exclus:\t%d\t\t\t%d\n",count, countB );
  401. }
  402. bool CMeanEstimate::IsANumber(double x)
  403. {
  404. return ((x < (double)(std::numeric_limits<double>::max())) &&
  405. (x > (double)(std::numeric_limits<double>::min())));
  406. }
  407. double CMeanEstimate::GetMean(std::vector< double > *A)
  408. {
  409. double sum=0;
  410. double x=0;
  411. std::vector< double >::iterator
  412. it = A->begin(),
  413. ie = A->end();
  414. while ( it != ie )
  415. {
  416. x = *it;
  417. if( !IsANumber(x) )
  418. {
  419. int xxx=0; xxx++;
  420. } else
  421. // if( x == std::numeric_limits<double>::quiet_NaN() ){
  422. // int xxx=0; xxx++;
  423. // }
  424. // if( abs(x) == std::numeric_limits<double>::infinity() ){
  425. // int xxx=0; xxx++;
  426. // }
  427. sum+= x;
  428. ++it;
  429. }
  430. return sum / (double)A->size();
  431. }
  432. double CMeanEstimate::GetStandardDeviation(std::vector< double > *A)
  433. {
  434. return sqrt(GetVariance(A));
  435. }
  436. double CMeanEstimate::GetVariance(std::vector< double > *A)
  437. {
  438. double mean=GetMean(A);
  439. double sum=0, diff=0;
  440. std::vector< double >::iterator
  441. it = A->begin(),
  442. ie = A->end();
  443. while ( it != ie )
  444. {
  445. diff = *it - mean;
  446. sum+= diff*diff;
  447. ++it;
  448. }
  449. return sum / (double)A->size();
  450. }
  451. //void CMeanEstimate::OutputToFile(char* filename, CArray<double> *A, CArray<double> *B)
  452. ////implementation rapide et specifique est repectant peu le principde objet !!
  453. //{
  454. // FILE* file = fopen(filename, "a");
  455. //
  456. // if(file==0){
  457. // ASSERT(file);
  458. // }else{
  459. // fprintf(file, "Mean\n %f \t %f \n", CMeanEstimate::GetMean(A), CMeanEstimate::GetMean(B));
  460. // fprintf(file, "StandardDeviation\n %f \t %f \n", CMeanEstimate::GetStandardDeviation(A), CMeanEstimate::GetStandardDeviation(B));
  461. // fprintf(file, "\nValues:\nAA \tII\n");
  462. // ASSERT(A->GetSize()==B->GetSize());
  463. // for(int i=A->GetSize()-1; i; i--){
  464. // fprintf(file, "%g \t %g \n", A->GetAt(i), B->GetAt(i));
  465. // }
  466. // fprintf(file, "\n\n");
  467. // fclose(file);
  468. // }
  469. //}