Index: tree-ssa-math-opts.c =================================================================== --- tree-ssa-math-opts.c (revision 153979) +++ tree-ssa-math-opts.c (working copy) @@ -528,7 +528,9 @@ execute_cse_reciprocals (void) || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)) { enum built_in_function code; - bool md_code; + bool md_code, fail; + imm_use_iterator ui; + use_operand_p use_p; code = DECL_FUNCTION_CODE (fndecl); md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD; @@ -537,12 +539,34 @@ execute_cse_reciprocals (void) if (!fndecl) continue; + /* Check that all uses of the SSA name are divisions, + otherwise replacing the defining statement will do + the wrong thing. */ + fail = false; + FOR_EACH_IMM_USE_FAST (use_p, ui, arg1) + { + gimple stmt2 = USE_STMT (use_p); + if (!is_gimple_assign (stmt2) + || gimple_assign_rhs_code (stmt2) != RDIV_EXPR + || gimple_assign_rhs1 (stmt2) == arg1 + || gimple_assign_rhs2 (stmt2) != arg1) + { + fail = true; + break; + } + } + if (fail) + continue; + gimple_call_set_fndecl (stmt1, fndecl); update_stmt (stmt1); - gimple_assign_set_rhs_code (stmt, MULT_EXPR); - fold_stmt_inplace (stmt); - update_stmt (stmt); + FOR_EACH_IMM_USE_STMT (stmt, ui, arg1) + { + gimple_assign_set_rhs_code (stmt, MULT_EXPR); + fold_stmt_inplace (stmt); + update_stmt (stmt); + } } } } Index: testsuite/gcc.target/i386/pr41963.c =================================================================== --- testsuite/gcc.target/i386/pr41963.c (revision 0) +++ testsuite/gcc.target/i386/pr41963.c (revision 0) @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -mrecip" } */ +#include + +extern float sqrtf(float); + +static __attribute__((noinline)) void f (float *dst, float *src) +{ + int i, j; + for (i = 0; i < 2; i++) + { + float len; + dst[0] = src[0]; + dst[1] = src[1]; + len = sqrtf (dst[0] * dst[0] + dst[1] * dst[1]); + if (len > 0.5f) + { + len = 1.0f / len; + dst[0] *= len; + dst[1] *= len; + } + } +} + +extern void abort (void); + +int main() +{ + float dst[2], src[2]; + src[0] = 2.0f; + src[1] = 5.0f; + f (dst, src); + if (fabsf (dst[0] * dst[0] + dst[1] * dst[1] - 1.0f) > 0.01f) + abort (); + return 0; +}