This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][match-and-simplify] More flexible 'for', polish match-builtins.pd
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 10 Sep 2014 13:55:21 +0200 (CEST)
- Subject: [PATCH][match-and-simplify] More flexible 'for', polish match-builtins.pd
- Authentication-results: sourceware.org; auth=none
This completes match-builtins.pd by handling all builtin variants
in all present patterns. That requires a little more flexibility
in the 'for' handling to support for example
/* Optimize sqrt(expN(x)) = expN(x*0.5). */
(for expfn (BUILT_IN_EXP10F BUILT_IN_EXP10 BUILT_IN_EXP10L
BUILT_IN_POW10F BUILT_IN_POW10 BUILT_IN_POW10L
BUILT_IN_EXPF BUILT_IN_EXP BUILT_IN_EXPL
BUILT_IN_EXP2F BUILT_IN_EXP2 BUILT_IN_EXP2L)
SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
(simplify
(SQRT (expfn @0))
(expfn (mult @0 { build_real (type, dconsthalf); }))))
without having to repeat the SQRT substitutions four times
to match the expfn substitution length. Now we just repeat
the substitution vector as needed (but require all lengths
to be a multiple of the smallest one, just as a sanity check).
The patch also fixes the cabs() patterns and completes them
from fold_builtin_cabs. Similarly it fixes the x * pow(x, c) -> pow (x,
c+1) pattern to match that from fold-const.c but in addition requires
-fno-math-errno (errno differences can occur - this seems to be
a common issue with all math builtin foldings that are just
conditionalized on flag_unsafe_math_optimizations...).
Committed.
Richard.
2014-09-10 Richard Biener <rguenther@suse.de>
* genmatch.c (parse_for): Allow more flexible replacement counts.
* match-builtins.pd: Fix initial patterns and complete them
from the source functions. Properly handle all builtin variants.
Index: gcc/genmatch.c
===================================================================
--- gcc/genmatch.c (revision 215057)
+++ gcc/genmatch.c (working copy)
@@ -2414,7 +2414,7 @@ parse_for (cpp_reader *r, source_locatio
vec<const char *> user_ids = vNULL;
vec< vec<const char *> > opers_vec = vNULL;
const cpp_token *token;
- unsigned n_opers = 0;
+ unsigned min_n_opers = 0, max_n_opers = 0;
while (1)
{
@@ -2445,13 +2445,29 @@ parse_for (cpp_reader *r, source_locatio
opers.safe_push (oper);
}
+ token = expect (r, CPP_CLOSE_PAREN);
+ if (opers.length () == 0)
+ fatal_at (token, "A user-defined identifier must have at least one substitution");
+ if (opers_vec.length () == 0)
+ {
+ min_n_opers = opers.length ();
+ max_n_opers = opers.length ();
+ }
+ else
+ {
+ if (opers.length () % min_n_opers != 0
+ && min_n_opers % opers.length () != 0)
+ fatal_at (token, "All user-defined identifiers must have a "
+ "multiple number of operator substitutions of the "
+ "smallest number of substitutions");
+ if (opers.length () < min_n_opers)
+ min_n_opers = opers.length ();
+ else if (opers.length () > max_n_opers)
+ max_n_opers = opers.length ();
+ }
+
opers_vec.safe_push (opers);
- if (n_opers == 0)
- n_opers = opers.length ();
- else if (n_opers != opers.length ())
- fatal_at (token, "All user-defined identifiers must have same number of operator substitutions");
- eat_token (r, CPP_CLOSE_PAREN);
- }
+ }
if (user_ids.length () == 0)
fatal_at (token, "for requires at least one user-defined identifier");
@@ -2474,12 +2490,8 @@ parse_for (cpp_reader *r, source_locatio
{
simplify *s = for_simplifiers[ix];
- for (unsigned j = 0; j < n_opers; ++j)
+ for (unsigned j = 0; j < max_n_opers; ++j)
{
- vec<const char *> opers = vNULL;
- for (unsigned i = 0; i < opers_vec.length (); ++i)
- opers.safe_push (opers_vec[i][j]);
-
operand *match_op = s->match;
operand *result_op = s->result;
vec<if_or_with> ifexpr_vec = vNULL;
@@ -2489,16 +2501,17 @@ parse_for (cpp_reader *r, source_locatio
for (unsigned i = 0; i < n_ids; ++i)
{
- match_op = replace_id (match_op, user_ids[i], opers[i]);
- result_op = replace_id (result_op, user_ids[i], opers[i]);
+ const char *oper = opers_vec[i][j % opers_vec[i].length ()];
+ match_op = replace_id (match_op, user_ids[i], oper);
+ result_op = replace_id (result_op, user_ids[i], oper);
for (unsigned k = 0; k < s->ifexpr_vec.length (); ++k)
- ifexpr_vec[k].cexpr = replace_id (ifexpr_vec[k].cexpr, user_ids[i], opers[i]);
+ ifexpr_vec[k].cexpr = replace_id (ifexpr_vec[k].cexpr, user_ids[i], oper);
}
- simplify *ns = new simplify (s->name, match_op, s->match_location,
- result_op, s->result_location, ifexpr_vec);
- simplifiers.safe_push (ns);
+ simplify *ns = new simplify (s->name, match_op, s->match_location,
+ result_op, s->result_location, ifexpr_vec);
+ simplifiers.safe_push (ns);
}
}
}
Index: gcc/match-builtin.pd
===================================================================
--- gcc/match-builtin.pd (revision 215011)
+++ gcc/match-builtin.pd (working copy)
@@ -18,29 +18,44 @@ along with GCC; see the file COPYING3.
<http://www.gnu.org/licenses/>. */
-/* ??? For math builtins we fail to properly repeat patterns for
- all FP type kinds (sqrtf, sqrt, sqrtl). And we fail to provide
- a mechanism to iterate two ops in lock-step like
- (for fn1 in sqrt sqrtf sqrtl and fn2 in pow powf powl ...)
- if we were to do that repetition semi-manually.
- We could also automagically use the type of the expr to
- always do mathfn_built_in at code-gen time and always
- automagically iterate over kinds (but that's bogus for
- things like (convert (BUILT_IN_SQRT @0)) -> (BUILT_IN_SQRTF @0). */
-
-
-/* One builtin function to builtin function. */
-(simplify
- (BUILT_IN_CABS (complex:c @0 real_zerop))
+/* From fold_builtin_cabs. */
+/* If either part is zero, cabs is fabs of the other. */
+(for CABS (BUILT_IN_CABSF BUILT_IN_CABS BUILT_IN_CABSL)
+ (simplify
+ (CABS (complex:c @0 real_zerop))
(abs @0))
-/* One builtin function to expr. */
-(simplify
- (BUILT_IN_CABS (complex @0 @0))
- (mult (abs @0) { build_real (TREE_TYPE (@0), real_value_truncate (TYPE_MODE (TREE_TYPE (@0)), dconst_sqrt2 ())); }))
-/* One nested fn. */
-(simplify
- (mult:c (BUILT_IN_POW @0 @1) @0)
- (BUILT_IN_POW @0 (PLUS_EXPR @1 { build_one_cst (TREE_TYPE (@1)); })))
+ /* cabs(x+xi) -> fabs(x)*sqrt(2). */
+ (if (flag_unsafe_math_optimizations)
+ (simplify
+ (CABS (complex @0 @0))
+ (mult (abs @0) { build_real (TREE_TYPE (@0), real_value_truncate (TYPE_MODE (TREE_TYPE (@0)), dconst_sqrt2 ())); })))
+ /* Optimize cabs(-z) and cabs(conj(z)) as cabs(z). */
+ (for op (negate conj)
+ (simplify
+ (CABS (op @0))
+ (CABS @0))))
+/* Don't do this when optimizing for size. */
+(if (flag_unsafe_math_optimizations && optimize_function_for_speed_p (cfun))
+ (for CABS (BUILT_IN_CABSF BUILT_IN_CABS BUILT_IN_CABSL)
+ SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
+ (simplify
+ (CABS @0)
+ (SQRT (plus
+ /* ??? There is no way to CSE here. We'd need to support
+ expression captures here, like with
+ (mult (realpart@1 @0) @1) */
+ (mult (realpart @0) (realpart @0))
+ (mult (imagpart @0) (imagpart @0)))))))
+
+/* From fold_binary. */
+/* Optimize x*pow(x,c) as pow(x,c+1). */
+(if (flag_unsafe_math_optimizations
+ /* ??? fold-const.c does not check for flag_errno_math. */
+ && !flag_errno_math)
+ (for POW (BUILT_IN_POW BUILT_IN_POWF BUILT_IN_POWL)
+ (simplify
+ (mult:c (POW @0 REAL_CST@1) @0)
+ (POW @0 (plus @1 { build_one_cst (TREE_TYPE (@1)); })))))
/* From fold_builtin_fabs and fold_builtin_abs. */
/* Fold a call to fabs, fabsf or fabsl, to abs, labs, llabs or imaxabs. */
@@ -50,14 +65,14 @@ along with GCC; see the file COPYING3.
(abs @0)))
/* From fold_builtin_pow. */
-/* Optimize pow(1.0,y) = 1.0. */
-(simplify
- (BUILT_IN_POW real_onep@0 @1)
- @0)
-
(for POW (BUILT_IN_POWF BUILT_IN_POW BUILT_IN_POWL)
SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
CBRT (BUILT_IN_CBRTF BUILT_IN_CBRT BUILT_IN_CBRTL)
+ /* Optimize pow(1.0,y) = 1.0. */
+ (simplify
+ (POW real_onep@0 @1)
+ @0)
+
(simplify
(POW @0 REAL_CST@1)
(with { REAL_VALUE_TYPE c = TREE_REAL_CST (@1); }
@@ -80,14 +95,14 @@ along with GCC; see the file COPYING3.
= real_value_truncate (TYPE_MODE (type), dconst_third ()); }
(if (flag_unsafe_math_optimizations
&& REAL_VALUES_EQUAL (c, dconstroot))
- (CBRT @0))))))
+ (CBRT @0)))))
-/* Strip sign ops from even integer powers.
- ??? The code in builtins.c manages to perform this recursively
- through the whole expression in arg0 of pow. */
-(for sgnop (abs negate)
+ /* Strip sign ops from even integer powers.
+ ??? The code in builtins.c manages to perform this recursively
+ through the whole expression in arg0 of pow. */
+ (for sgnop (abs negate)
(simplify
- (BUILT_IN_POW (sgnop @0) REAL_CST@1)
+ (POW (sgnop @0) REAL_CST@1)
(with
{
REAL_VALUE_TYPE c = TREE_REAL_CST (@1);
@@ -98,19 +113,26 @@ along with GCC; see the file COPYING3.
(if (real_identical (&c, &cint)
&& (n & 1) == 0
&& flag_unsafe_math_optimizations)
- (BUILT_IN_POW @0 @1)))))
+ (POW @0 @1))))))
/* From fold_builtin_sqrt. */
(if (flag_unsafe_math_optimizations)
/* Optimize sqrt(expN(x)) = expN(x*0.5). */
- (for expfn (BUILT_IN_EXP10 BUILT_IN_POW10 BUILT_IN_EXP BUILT_IN_EXP2)
+ (for expfn (BUILT_IN_EXP10F BUILT_IN_EXP10 BUILT_IN_EXP10L
+ BUILT_IN_POW10F BUILT_IN_POW10 BUILT_IN_POW10L
+ BUILT_IN_EXPF BUILT_IN_EXP BUILT_IN_EXPL
+ BUILT_IN_EXP2F BUILT_IN_EXP2 BUILT_IN_EXP2L)
+ SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
(simplify
- (BUILT_IN_SQRT (expfn @0))
+ (SQRT (expfn @0))
(expfn (mult @0 { build_real (type, dconsthalf); }))))
/* Optimize sqrt(Nroot(x)) -> pow(x,1/(2*N)). */
- (for rootfn (BUILT_IN_SQRT BUILT_IN_CBRT)
+ (for rootfn (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL
+ BUILT_IN_CBRTF BUILT_IN_CBRT BUILT_IN_CBRTL)
+ SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
+ POW (BUILT_IN_POWF BUILT_IN_POW BUILT_IN_POWL)
(simplify
- (BUILT_IN_SQRT (rootfn @0))
+ (SQRT (rootfn @0))
(with
{ REAL_VALUE_TYPE dconstroot;
if (BUILTIN_SQRT_P (rootfn)) dconstroot = dconsthalf;
@@ -118,8 +140,10 @@ along with GCC; see the file COPYING3.
/* Adjust for the outer root. */
SET_REAL_EXP (&dconstroot, REAL_EXP (&dconstroot) - 1);
dconstroot = real_value_truncate (TYPE_MODE (type), dconstroot); }
- (BUILT_IN_POW @0 { build_real (type, dconstroot); }))))
+ (POW @0 { build_real (type, dconstroot); }))))
/* Optimize sqrt(pow(x,y)) = pow(|x|,y*0.5). */
- (simplify
- (BUILT_IN_SQRT (BUILT_IN_POW @0 @1))
- (BUILT_IN_POW (abs @0) (mult @1 { build_real (TREE_TYPE (@1), dconsthalf); }))))
+ (for SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
+ POW (BUILT_IN_POWF BUILT_IN_POW BUILT_IN_POWL)
+ (simplify
+ (SQRT (POW @0 @1))
+ (POW (abs @0) (mult @1 { build_real (TREE_TYPE (@1), dconsthalf); })))))