[PATCH] Improve pow (C, x) -> exp (log (C) * x) optimization (PR middle-end/84309, take 2)
Richard Biener
rguenther@suse.de
Tue Feb 13 07:59:00 GMT 2018
On Mon, 12 Feb 2018, Jakub Jelinek wrote:
> On Sat, Feb 10, 2018 at 03:26:46PM +0100, Jakub Jelinek wrote:
> > If use_exp2 is true and (cfun->curr_properties & PROP_gimple_lvec) == 0,
> > don't fold it? Then I guess if we vectorize or slp vectorize the pow
> > as vector pow, we'd need to match.pd it into the exp (log (vec_cst) * x).
>
> Here is an updated patch, that defers it for pow (0x2.0pN, x) until after
> vectorization and adds tree-vect-patterns.c matcher that will handle it
> during vectorization (that one using exp, because we don't have exp2
> vectorized).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok.
Thanks,
Richard.
> 2018-02-12 Jakub Jelinek <jakub@redhat.com>
>
> PR middle-end/84309
> * match.pd (pow(C,x) -> exp(log(C)*x)): Optimize instead into
> exp2(log2(C)*x) if C is a power of 2 and c99 runtime is available.
> * generic-match-head.c (canonicalize_math_after_vectorization_p): New
> inline function.
> * gimple-match-head.c (canonicalize_math_after_vectorization_p): New
> inline function.
> * omp-simd-clone.h: New file.
> * omp-simd-clone.c: Include omp-simd-clone.h.
> (expand_simd_clones): No longer static.
> * tree-vect-patterns.c: Include fold-const-call.h, attribs.h,
> cgraph.h and omp-simd-clone.h.
> (vect_recog_pow_pattern): Optimize pow(C,x) to exp(log(C)*x).
> (vect_recog_widen_shift_pattern): Formatting fix.
> (vect_pattern_recog_1): Don't check optab for calls.
>
> * gcc.dg/pr84309.c: New test.
> * gcc.target/i386/pr84309.c: New test.
>
> --- gcc/match.pd.jj 2018-02-09 19:11:26.910070491 +0100
> +++ gcc/match.pd 2018-02-12 14:15:05.653779352 +0100
> @@ -3992,15 +3992,36 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (logs (pows @0 @1))
> (mult @1 (logs @0))))
>
> - /* pow(C,x) -> exp(log(C)*x) if C > 0. */
> + /* pow(C,x) -> exp(log(C)*x) if C > 0,
> + or if C is a positive power of 2,
> + pow(C,x) -> exp2(log2(C)*x). */
> (for pows (POW)
> exps (EXP)
> logs (LOG)
> + exp2s (EXP2)
> + log2s (LOG2)
> (simplify
> (pows REAL_CST@0 @1)
> - (if (real_compare (GT_EXPR, TREE_REAL_CST_PTR (@0), &dconst0)
> - && real_isfinite (TREE_REAL_CST_PTR (@0)))
> - (exps (mult (logs @0) @1)))))
> + (if (real_compare (GT_EXPR, TREE_REAL_CST_PTR (@0), &dconst0)
> + && real_isfinite (TREE_REAL_CST_PTR (@0)))
> + (with {
> + const REAL_VALUE_TYPE *const value = TREE_REAL_CST_PTR (@0);
> + bool use_exp2 = false;
> + if (targetm.libc_has_function (function_c99_misc)
> + && value->cl == rvc_normal)
> + {
> + REAL_VALUE_TYPE frac_rvt = *value;
> + SET_REAL_EXP (&frac_rvt, 1);
> + if (real_equal (&frac_rvt, &dconst1))
> + use_exp2 = true;
> + }
> + }
> + (if (!use_exp2)
> + (exps (mult (logs @0) @1))
> + /* As libmvec doesn't have a vectorized exp2, defer optimizing
> + this until after vectorization. */
> + (if (canonicalize_math_after_vectorization_p ())
> + (exps (mult (logs @0) @1))))))))
>
> (for sqrts (SQRT)
> cbrts (CBRT)
> --- gcc/generic-match-head.c.jj 2018-01-03 10:19:55.454534005 +0100
> +++ gcc/generic-match-head.c 2018-02-12 14:13:27.088784495 +0100
> @@ -68,3 +68,12 @@ canonicalize_math_p ()
> {
> return true;
> }
> +
> +/* Return true if math operations that are beneficial only after
> + vectorization should be canonicalized. */
> +
> +static inline bool
> +canonicalize_math_after_vectorization_p ()
> +{
> + return false;
> +}
> --- gcc/gimple-match-head.c.jj 2018-01-03 10:19:55.931534081 +0100
> +++ gcc/gimple-match-head.c 2018-02-12 14:14:17.352781873 +0100
> @@ -831,3 +831,12 @@ canonicalize_math_p ()
> {
> return !cfun || (cfun->curr_properties & PROP_gimple_opt_math) == 0;
> }
> +
> +/* Return true if math operations that are beneficial only after
> + vectorization should be canonicalized. */
> +
> +static inline bool
> +canonicalize_math_after_vectorization_p ()
> +{
> + return !cfun || (cfun->curr_properties & PROP_gimple_lvec) != 0;
> +}
> --- gcc/omp-simd-clone.h.jj 2018-02-12 18:11:01.843931808 +0100
> +++ gcc/omp-simd-clone.h 2018-02-12 18:12:13.901948041 +0100
> @@ -0,0 +1,26 @@
> +/* OMP constructs' SIMD clone supporting code.
> +
> + Copyright (C) 2005-2018 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
> +for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3. If not see
> +<http://www.gnu.org/licenses/>. */
> +
> +#ifndef GCC_OMP_SIMD_CLONE_H
> +#define GCC_OMP_SIMD_CLONE_H
> +
> +extern void expand_simd_clones (struct cgraph_node *);
> +
> +#endif /* GCC_OMP_SIMD_CLONE_H */
> --- gcc/omp-simd-clone.c.jj 2018-01-25 16:31:35.464138243 +0100
> +++ gcc/omp-simd-clone.c 2018-02-12 18:10:48.214928742 +0100
> @@ -50,6 +50,7 @@ along with GCC; see the file COPYING3.
> #include "varasm.h"
> #include "stringpool.h"
> #include "attribs.h"
> +#include "omp-simd-clone.h"
>
> /* Return the number of elements in vector type VECTYPE, which is associated
> with a SIMD clone. At present these always have a constant length. */
> @@ -1568,7 +1569,7 @@ simd_clone_adjust (struct cgraph_node *n
> /* If the function in NODE is tagged as an elemental SIMD function,
> create the appropriate SIMD clones. */
>
> -static void
> +void
> expand_simd_clones (struct cgraph_node *node)
> {
> tree attr = lookup_attribute ("omp declare simd",
> --- gcc/tree-vect-patterns.c.jj 2018-01-23 14:48:52.783269685 +0100
> +++ gcc/tree-vect-patterns.c 2018-02-12 18:15:49.730996661 +0100
> @@ -41,6 +41,10 @@ along with GCC; see the file COPYING3.
> #include "builtins.h"
> #include "internal-fn.h"
> #include "case-cfn-macros.h"
> +#include "fold-const-call.h"
> +#include "attribs.h"
> +#include "cgraph.h"
> +#include "omp-simd-clone.h"
>
> /* Pattern recognition functions */
> static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
> @@ -1049,7 +1053,7 @@ vect_recog_pow_pattern (vec<gimple *> *s
> tree *type_out)
> {
> gimple *last_stmt = (*stmts)[0];
> - tree base, exp = NULL;
> + tree base, exp;
> gimple *stmt;
> tree var;
>
> @@ -1060,17 +1064,77 @@ vect_recog_pow_pattern (vec<gimple *> *s
> {
> CASE_CFN_POW:
> CASE_CFN_POWI:
> - base = gimple_call_arg (last_stmt, 0);
> - exp = gimple_call_arg (last_stmt, 1);
> - if (TREE_CODE (exp) != REAL_CST
> - && TREE_CODE (exp) != INTEGER_CST)
> - return NULL;
> break;
>
> default:
> return NULL;
> }
>
> + base = gimple_call_arg (last_stmt, 0);
> + exp = gimple_call_arg (last_stmt, 1);
> + if (TREE_CODE (exp) != REAL_CST
> + && TREE_CODE (exp) != INTEGER_CST)
> + {
> + if (flag_unsafe_math_optimizations
> + && TREE_CODE (base) == REAL_CST
> + && !gimple_call_internal_p (last_stmt))
> + {
> + combined_fn log_cfn;
> + built_in_function exp_bfn;
> + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
> + {
> + case BUILT_IN_POW:
> + log_cfn = CFN_BUILT_IN_LOG;
> + exp_bfn = BUILT_IN_EXP;
> + break;
> + case BUILT_IN_POWF:
> + log_cfn = CFN_BUILT_IN_LOGF;
> + exp_bfn = BUILT_IN_EXPF;
> + break;
> + case BUILT_IN_POWL:
> + log_cfn = CFN_BUILT_IN_LOGL;
> + exp_bfn = BUILT_IN_EXPL;
> + break;
> + default:
> + return NULL;
> + }
> + tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
> + tree exp_decl = builtin_decl_implicit (exp_bfn);
> + /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
> + does that, but if C is a power of 2, we want to use
> + exp2 (log2 (C) * x) in the non-vectorized version, but for
> + vectorization we don't have vectorized exp2. */
> + if (logc
> + && TREE_CODE (logc) == REAL_CST
> + && exp_decl
> + && lookup_attribute ("omp declare simd",
> + DECL_ATTRIBUTES (exp_decl)))
> + {
> + cgraph_node *node = cgraph_node::get_create (exp_decl);
> + if (node->simd_clones == NULL)
> + {
> + if (node->definition)
> + return NULL;
> + expand_simd_clones (node);
> + if (node->simd_clones == NULL)
> + return NULL;
> + }
> + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
> + tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
> + gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
> + new_pattern_def_seq (stmt_vinfo, g);
> + *type_in = TREE_TYPE (base);
> + *type_out = NULL_TREE;
> + tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
> + g = gimple_build_call (exp_decl, 1, def);
> + gimple_call_set_lhs (g, res);
> + return g;
> + }
> + }
> +
> + return NULL;
> + }
> +
> /* We now have a pow or powi builtin function call with a constant
> exponent. */
>
> @@ -1744,8 +1808,8 @@ vect_recog_widen_shift_pattern (vec<gimp
>
> /* Pattern supported. Create a stmt to be used to replace the pattern. */
> var = vect_recog_temp_ssa_var (type, NULL);
> - pattern_stmt =
> - gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
> + pattern_stmt
> + = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
> if (wstmt)
> {
> stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
> @@ -4439,10 +4503,6 @@ vect_pattern_recog_1 (vect_recog_func *r
> }
> else
> {
> - machine_mode vec_mode;
> - enum insn_code icode;
> - optab optab;
> -
> /* Check target support */
> type_in = get_vectype_for_scalar_type (type_in);
> if (!type_in)
> @@ -4456,19 +4516,18 @@ vect_pattern_recog_1 (vect_recog_func *r
> pattern_vectype = type_out;
>
> if (is_gimple_assign (pattern_stmt))
> - code = gimple_assign_rhs_code (pattern_stmt);
> - else
> - {
> - gcc_assert (is_gimple_call (pattern_stmt));
> - code = CALL_EXPR;
> + {
> + enum insn_code icode;
> + code = gimple_assign_rhs_code (pattern_stmt);
> + optab optab = optab_for_tree_code (code, type_in, optab_default);
> + machine_mode vec_mode = TYPE_MODE (type_in);
> + if (!optab
> + || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
> + || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
> + return false;
> }
> -
> - optab = optab_for_tree_code (code, type_in, optab_default);
> - vec_mode = TYPE_MODE (type_in);
> - if (!optab
> - || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
> - || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
> - return false;
> + else
> + gcc_assert (is_gimple_call (pattern_stmt));
> }
>
> /* Found a vectorizable pattern. */
> --- gcc/testsuite/gcc.dg/pr84309.c.jj 2018-02-12 12:24:22.214522183 +0100
> +++ gcc/testsuite/gcc.dg/pr84309.c 2018-02-12 12:24:22.214522183 +0100
> @@ -0,0 +1,14 @@
> +/* PR middle-end/84309 */
> +/* { dg-do run { target c99_runtime } } */
> +/* { dg-options "-O2 -ffast-math" } */
> +
> +int
> +main ()
> +{
> + unsigned long a = 1024;
> + unsigned long b = 16 * 1024;
> + unsigned long c = __builtin_pow (2, (__builtin_log2 (a) + __builtin_log2 (b)) / 2);
> + if (c != 4096)
> + __builtin_abort ();
> + return 0;
> +}
> --- gcc/testsuite/gcc.target/i386/pr84309.c.jj 2018-02-12 18:20:15.819056596 +0100
> +++ gcc/testsuite/gcc.target/i386/pr84309.c 2018-02-12 18:21:00.462066648 +0100
> @@ -0,0 +1,16 @@
> +/* PR middle-end/84309 */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -mavx" } */
> +
> +double pow (double, double) __attribute__((simd));
> +double exp (double) __attribute__((simd));
> +extern double a[1024], b[1024];
> +
> +void
> +foo (void)
> +{
> + for (int i = 0; i < 1024; ++i)
> + a[i] = pow (2.0, b[i]);
> +}
> +
> +/* { dg-final { scan-assembler "_ZGVcN4v_exp" } } */
>
>
> Jakub
>
>
--
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
More information about the Gcc-patches
mailing list