When debugging some code we discovered that some float math appears to be incorrect at times. Because of the size of the values we were using we should have really been using doubles. The values that get assigned differ if the assignement is inside a for loop or external to the loop. This 'problem' has been demonstrated in gcc 3.4.3 and 4.0 on the x86 platform. It does not show up when cross compiled for the ARM core. The x86 gcc packages used were fc3 distribution rpm packages. Note: Compiling this sample code under Visual studio under windows demonstrates similar issues when compiled as debug code. When compiled as release code it does not have issues. ------------------------ The 'problem' has bee reduced to the following example code: #include "stdio.h" const int GOOD_A = -85911104; const int GOOD_B = 42537964; const int BAD_A = -85908672; const int BAD_B = 42539240; class QPoint { public: QPoint() { m_x = 0; m_y = 0; } QPoint( int x, int y ) { m_x = x; m_y = y; } int x() const { return ( m_x ); } int y() const { return ( m_y ); } protected: int m_x; int m_y; }; class QPointArray { public: QPointArray( int size ) { m_points = new QPoint[size]; } const QPoint& Point( int idx ) { return ( m_points[idx] ); } void SetPoint( int idx, int x, int y ) { m_points[idx] = QPoint( x, y ); } protected: QPoint *m_points; }; int m_points = 28; int main(int argc, char *argv[]) { QPointArray m_pt_array( m_points ); m_pt_array.SetPoint( 0, -85911106, 42537898 ); m_pt_array.SetPoint( 1, -85910960, 42537964 ); m_pt_array.SetPoint( 2, -85910328, 42538390 ); m_pt_array.SetPoint( 3, -85908832, 42538685 ); m_pt_array.SetPoint( 4, -85908717, 42538755 ); m_pt_array.SetPoint( 5, -85908717, 42539009 ); m_pt_array.SetPoint( 6, -85908676, 42539131 ); m_pt_array.SetPoint( 7, -85908585, 42539238 ); m_pt_array.SetPoint( 8, -85908469, 42539331 ); m_pt_array.SetPoint( 9, -85908386, 42539440 ); m_pt_array.SetPoint( 10, -85908361, 42539565 ); m_pt_array.SetPoint( 11, -85908531, 42540442 ); m_pt_array.SetPoint( 12, -85908610, 42540555 ); m_pt_array.SetPoint( 13, -85908713, 42540656 ); m_pt_array.SetPoint( 14, -85908849, 42540733 ); m_pt_array.SetPoint( 15, -85909315, 42540895 ); m_pt_array.SetPoint( 16, -85909479, 42540928 ); m_pt_array.SetPoint( 17, -85911521, 42541069 ); m_pt_array.SetPoint( 18, -85911792, 42541054 ); m_pt_array.SetPoint( 19, -85911877, 42540895 ); m_pt_array.SetPoint( 20, -85911996, 42540731 ); m_pt_array.SetPoint( 21, -85912216, 42540613 ); m_pt_array.SetPoint( 22, -85912609, 42540335 ); m_pt_array.SetPoint( 23, -85912778, 42540178 ); m_pt_array.SetPoint( 24, -85912884, 42539993 ); m_pt_array.SetPoint( 25, -85912924, 42539795 ); m_pt_array.SetPoint( 26, -85912974, 42538217 ); m_pt_array.SetPoint( 27, -85911106, 42537898 ); float area = 0.0; for ( int i = 0; i < m_points; i++ ) { QPoint cur = m_pt_array.Point( i ); QPoint next = m_pt_array.Point( ( i + 1 ) % m_points ); float f = (float)cur.x() * (float)next.y(); float af = cur.x(); float bf = next.y(); float ff = af * bf; area -= f; printf( "%d * %d = %f or %f\n", cur.x(), next.y(), f, ff ); area += (float)cur.y() * next.x(); } return 0; } ------------------------------------------- The output produced is as follows: -85911106 * 42537964 = -3654483519209472.000000 or -3654483519209472.000000 -85910960 * 42538390 = -3654513852416000.000000 or -3654514120851456.000000 -85910328 * 42538685 = -3654512510238720.000000 or -3654512241803264.000000 -85908832 * 42538755 = -3654454796615680.000000 or -3654454796615680.000000 -85908717 * 42539009 = -3654471708049408.000000 or -3654471708049408.000000 -85908717 * 42539131 = -3654482177032192.000000 or -3654482445467648.000000 -85908676 * 42539238 = -3654489424789504.000000 or -3654489693224960.000000 -85908585 * 42539331 = -3654493719756800.000000 or -3654493719756800.000000 -85908469 * 42539440 = -3654498283159552.000000 or -3654498283159552.000000 -85908386 * 42539565 = -3654505262481408.000000 or -3654505262481408.000000 -85908361 * 42540442 = -3654579619102720.000000 or -3654579350667264.000000 -85908531 * 42540555 = -3654596530536448.000000 or -3654596530536448.000000 -85908610 * 42540656 = -3654608610131968.000000 or -3654608610131968.000000 -85908713 * 42540733 = -3654619615985664.000000 or -3654619615985664.000000 -85908849 * 42540895 = -3654639211773952.000000 or -3654639480209408.000000 -85909315 * 42540928 = -3654661760352256.000000 or -3654661760352256.000000 -85909479 * 42541069 = -3654681087705088.000000 or -3654681087705088.000000 -85911521 * 42541054 = -3654766718615552.000000 or -3654766718615552.000000 -85911792 * 42540895 = -3654764571131904.000000 or -3654764571131904.000000 -85911877 * 42540731 = -3654754102149120.000000 or -3654754370584576.000000 -85911996 * 42540613 = -3654749270310912.000000 or -3654749001875456.000000 -85912216 * 42540335 = -3654734506360832.000000 or -3654734506360832.000000 -85912609 * 42540178 = -3654737727586304.000000 or -3654737459150848.000000 -85912778 * 42539993 = -3654728869216256.000000 or -3654728869216256.000000 -85912884 * 42539795 = -3654716252749824.000000 or -3654716521185280.000000 -85912924 * 42538217 = -3654582840328192.000000 or -3654582571892736.000000 -85912974 * 42537898 = -3654557338959872.000000 or -3654557338959872.000000 -85911106 * 42537898 = -3654477882064896.000000 or -3654477613629440.000000 ------------------------------ The sample code was built with the following make file: floatcast: floatcast.cpp g++ -o floatcast floatcast.cpp -------------------------------
*** This bug has been marked as a duplicate of 323 ***
You realize that your results differ only in the 8th digit -- which just so happens to be the precision you get from floats, right? W.
We did come to the realization that we should be using doubles to store the values, because the values we were comparing were really too large for floats. However, the fact that rearranging the code or compiling on one platform vs another could cause different results seemed to be wrong behaivour.