This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Builtins handling in IVOPT


Hi,

This patch works on the intrinsic calls handling issue in IVOPT mentioned here:
http://gcc.gnu.org/ml/gcc-patches/2010-10/msg01295.html

In find_interesting_uses_stmt, it changes

arg = expr
__builtin_xxx (arg)

to

arg = expr;
tmp = addr_expr (mem_ref(arg));
__builtin_xxx (tmp, ...)

So mem_ref(arg) could be handled by find_interesting_uses_address, and
an iv use of USE_ADDRESS type will be generated for expr, then a TMR
will be generated for expr in rewrite_use_address. Expand pass is
changed accordingly to keep the complex addressing mode not to be
splitted for cse.

With the patch we can handle the motivational case below.

#include <x86intrin.h>
extern __m128i arr[], d[];
void test (void)
{
    unsigned int b;
    for (b = 0; b < 1000; b += 2) {
      __m128i *p = (__m128i *)(&d[b]);
      __m128i a = _mm_load_si128(&arr[4*b+3]);
      __m128i v = _mm_loadu_si128(p);
      v = _mm_xor_si128(v, a);
      _mm_storeu_si128(p, v);
    }
}

gcc-r204792 Without the patch:
.L2:
        movdqu  (%rax), %xmm0
        subq    $-128, %rdx
        addq    $32, %rax
        pxor    -128(%rdx), %xmm0
        movups  %xmm0, -32(%rax)
        cmpq    $arr+64048, %rdx
        jne     .L2

gcc-r204792 With the patch:
.L2:
        movdqu  d(%rax), %xmm0
        pxor    arr+48(,%rax,4), %xmm0
        addq    $32, %rax
        movups  %xmm0, d-32(%rax)
        cmpq    $16000, %rax
        jne     .L2

Following things to be addressed later:
1. TER needs to be extended to handle the case when TMR is csed.

2. For more complex cases to work, besides this patch, we also need to
tune the AVG_LOOP_NITER, which is now set to 5, and it limits
induction variables merging a lot. Increasing the parameter to a
larger one could remove some induction variable in critical loop in
some our benchmarks. reg pressure estimation may also need to be
tuned. I will address it in a separate patch.

3. Now the information about which param of a builtin is of memory
reference type is simply listed as a switch-case in
builtin_has_mem_ref_p and ix86_builtin_has_mem_ref_p. This is not
ideal but there is no infrastructure to describe it in existing
implementation. More detailed information such as parameter and call
side-effect will be important for more precise alias and may worth
some work. Maybe the refinement about this patch could be done after
that.

regression and bootstrap pass on x86_64-linux-gnu. ok for trunk?

Thanks,
Wei.

2013-11-20  Wei Mi  <wmi@google.com>

        * expr.c (expand_expr_addr_expr_1): Not to split TMR.
        (expand_expr_real_1): Ditto.
        * targhooks.c (default_builtin_has_mem_ref_p): Default
        builtin.
        * tree-ssa-loop-ivopts.c (struct iv): Add field builtin_mem_param.
        (alloc_iv): Ditto.
        (remove_unused_ivs): Ditto.
        (builtin_has_mem_ref_p): New function.
        (find_interesting_uses_stmt): Special handling of builtins.
        * gimple-expr.c (is_gimple_address): Add handling of TMR.
        * gimple-expr.h (is_gimple_addressable): Ditto.
        * config/i386/i386.c (ix86_builtin_has_mem_ref_p): New target hook.
        (ix86_atomic_assign_expand_fenv): Ditto.
        (ix86_expand_special_args_builtin): Special handling of TMR for
        builtin.
        * target.def (builtin_has_mem_ref_p): New hook.
        * doc/tm.texi.in: Ditto.
        * doc/tm.texi: Generated.

2013-11-20  Wei Mi  <wmi@google.com>

        * gcc.dg/tree-ssa/ivopt_5.c: New test.

Index: expr.c
===================================================================
--- expr.c      (revision 204792)
+++ expr.c      (working copy)
@@ -7467,7 +7467,19 @@ expand_expr_addr_expr_1 (tree exp, rtx t
          tem = fold_build_pointer_plus (tem, TREE_OPERAND (exp, 1));
        return expand_expr (tem, target, tmode, modifier);
       }
+    case TARGET_MEM_REF:
+      {
+       int old_cse_not_expected;
+       addr_space_t as
+         = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (exp, 0))));

+       result = addr_for_mem_ref (exp, as, true);
+       old_cse_not_expected = cse_not_expected;
+       cse_not_expected = true;
+       result = memory_address_addr_space (tmode, result, as);
+       cse_not_expected = old_cse_not_expected;
+        return result;
+      }
     case CONST_DECL:
       /* Expand the initializer like constants above.  */
       result = XEXP (expand_expr_constant (DECL_INITIAL (exp),
@@ -9526,9 +9538,13 @@ expand_expr_real_1 (tree exp, rtx target
          = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (exp, 0))));
        enum insn_code icode;
        unsigned int align;
+       int old_cse_not_expected;

        op0 = addr_for_mem_ref (exp, as, true);
+       old_cse_not_expected = cse_not_expected;
+       cse_not_expected = true;
        op0 = memory_address_addr_space (mode, op0, as);
+       cse_not_expected = old_cse_not_expected;
        temp = gen_rtx_MEM (mode, op0);
        set_mem_attributes (temp, exp, 0);
        set_mem_addr_space (temp, as);
Index: targhooks.c
===================================================================
--- targhooks.c (revision 204792)
+++ targhooks.c (working copy)
@@ -566,6 +566,13 @@ default_builtin_reciprocal (unsigned int
 }

 bool
+default_builtin_has_mem_ref_p (int built_in_function ATTRIBUTE_UNUSED,
+                              int i ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+bool
 hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false (
        cumulative_args_t ca ATTRIBUTE_UNUSED,
        enum machine_mode mode ATTRIBUTE_UNUSED,
Index: tree-ssa-loop-ivopts.c
===================================================================
--- tree-ssa-loop-ivopts.c      (revision 204792)
+++ tree-ssa-loop-ivopts.c      (working copy)
@@ -135,6 +135,8 @@ struct iv
   tree ssa_name;       /* The ssa name with the value.  */
   bool biv_p;          /* Is it a biv?  */
   bool have_use_for;   /* Do we already have a use for it?  */
+  bool builtin_mem_param; /* Used as param of a builtin, so it could not be
+                            removed by remove_unused_ivs.  */
   unsigned use_id;     /* The identifier in the use if it is the case.  */
 };

@@ -952,6 +954,7 @@ alloc_iv (tree base, tree step)
   iv->step = step;
   iv->biv_p = false;
   iv->have_use_for = false;
+  iv->builtin_mem_param = false;
   iv->use_id = 0;
   iv->ssa_name = NULL_TREE;

@@ -1874,13 +1877,36 @@ find_invariants_stmt (struct ivopts_data
     }
 }

+/* Find whether the Ith param of the BUILTIN is a mem
+   reference. If I is -1, it returns whether the BUILTIN
+   contains any mem reference type param.  */
+
+static bool
+builtin_has_mem_ref_p (gimple builtin, int i)
+{
+  tree fndecl = gimple_call_fndecl (builtin);
+  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+    {
+      switch (DECL_FUNCTION_CODE (fndecl))
+       {
+       case BUILT_IN_PREFETCH:
+         if (i == -1 || i == 0)
+            return true;
+       }
+    }
+  else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
+    return targetm.builtin_has_mem_ref_p ((int) DECL_FUNCTION_CODE
(fndecl), i);
+
+  return false;
+}
+
 /* Finds interesting uses of induction variables in the statement STMT.  */

 static void
 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
 {
   struct iv *iv;
-  tree op, *lhs, *rhs;
+  tree op, *lhs, *rhs, callee;
   ssa_op_iter iter;
   use_operand_p use_p;
   enum tree_code code;
@@ -1937,6 +1963,74 @@ find_interesting_uses_stmt (struct ivopt

         call (memory).  */
     }
+  else if (is_gimple_call (stmt)
+          && (callee = gimple_call_fndecl (stmt))
+          && is_builtin_fn (callee)
+          && builtin_has_mem_ref_p (stmt, -1))
+    {
+      size_t i;
+      for (i = 0; i < gimple_call_num_args (stmt); i++)
+       {
+         if (builtin_has_mem_ref_p (stmt, i))
+           {
+             gimple def, g;
+             gimple_seq seq = NULL;
+             tree type, mem, addr, rhs;
+             tree *arg = gimple_call_arg_ptr (stmt, i);
+              location_t loc = gimple_location (stmt);
+             gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+
+              if (TREE_CODE (*arg) != SSA_NAME)
+               continue;
+
+             def = SSA_NAME_DEF_STMT (*arg);
+             gcc_assert (gimple_code (def) == GIMPLE_PHI
+                         || is_gimple_assign (def));
+             /* Suppose we have the case:
+                  arg = expr;
+                  call (arg)
+                If the expr is not like the form: &MEM(...), change it to:
+                  arg = expr;
+                  tmp = &MEM(arg);
+                  call(tmp);
+                then try to find interesting uses address in MEM(arg).  */
+             if (is_gimple_assign (def)
+                 && (rhs = gimple_assign_rhs1(def))
+                 && TREE_CODE (rhs) == ADDR_EXPR
+                 && REFERENCE_CLASS_P (TREE_OPERAND (rhs, 0)))
+               {
+                 iv = get_iv (data, *arg);
+                 if (iv && !iv->builtin_mem_param)
+                   iv->builtin_mem_param = true;
+
+                 find_interesting_uses_address (data, def,
+                                                &TREE_OPERAND (rhs, 0));
+               }
+             else
+               {
+                 mem = build2 (MEM_REF, TREE_TYPE (*arg), *arg,
+                               build_int_cst (TREE_TYPE (*arg), 0));
+                 type = build_pointer_type (TREE_TYPE (*arg));
+                 addr = build1 (ADDR_EXPR, type, mem);
+                 g = gimple_build_assign_with_ops (ADDR_EXPR,
+                                                   make_ssa_name (type, NULL),
+                                                   addr, NULL);
+                 gimple_call_set_arg (stmt, i, gimple_assign_lhs (g));
+                 update_stmt (stmt);
+                 gimple_set_location (g, loc);
+                 gimple_seq_add_stmt_without_update (&seq, g);
+                 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
+                 find_interesting_uses_address (data, g,
&TREE_OPERAND (addr, 0));
+               }
+           }
+         else
+           {
+             tree arg = gimple_call_arg (stmt, i);
+             find_interesting_uses_op (data, arg);
+           }
+       }
+      return;
+    }

   if (gimple_code (stmt) == GIMPLE_PHI
       && gimple_bb (stmt) == data->current_loop->header)
@@ -6507,10 +6601,10 @@ remove_unused_ivs (struct ivopts_data *d
          && !integer_zerop (info->iv->step)
          && !info->inv_id
          && !info->iv->have_use_for
+         && !info->iv->builtin_mem_param
          && !info->preserve_biv)
        {
          bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
-
          tree def = info->iv->ssa_name;

          if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
Index: gimple-expr.c
===================================================================
--- gimple-expr.c       (revision 204792)
+++ gimple-expr.c       (working copy)
@@ -633,7 +633,8 @@ is_gimple_address (const_tree t)
       op = TREE_OPERAND (op, 0);
     }

-  if (CONSTANT_CLASS_P (op) || TREE_CODE (op) == MEM_REF)
+  if (CONSTANT_CLASS_P (op) || TREE_CODE (op) == MEM_REF
+      || TREE_CODE (op) == TARGET_MEM_REF)
     return true;

   switch (TREE_CODE (op))
Index: gimple-expr.h
===================================================================
--- gimple-expr.h       (revision 204792)
+++ gimple-expr.h       (working copy)
@@ -122,7 +122,8 @@ static inline bool
 is_gimple_addressable (tree t)
 {
   return (is_gimple_id (t) || handled_component_p (t)
-         || TREE_CODE (t) == MEM_REF);
+         || TREE_CODE (t) == MEM_REF
+         || TREE_CODE (t) == TARGET_MEM_REF);
 }

 /* Return true if T is a valid gimple constant.  */
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 204792)
+++ config/i386/i386.c  (working copy)
@@ -29639,6 +29639,50 @@ ix86_init_mmx_sse_builtins (void)
     }
 }

+/* Return whether the Ith param of the BUILTIN_FUNCTION
+   is a memory reference. If I == -1, return whether the
+   BUILTIN_FUNCTION contains any memory reference param.  */
+
+static bool
+ix86_builtin_has_mem_ref_p (int builtin_function, int i)
+{
+  switch ((enum ix86_builtins) builtin_function)
+    {
+    /* LOAD.  */
+    case IX86_BUILTIN_LOADHPS:
+    case IX86_BUILTIN_LOADLPS:
+    case IX86_BUILTIN_LOADHPD:
+    case IX86_BUILTIN_LOADLPD:
+      if (i == -1 || i == 1)
+       return true;
+      break;
+    case IX86_BUILTIN_LOADUPS:
+    case IX86_BUILTIN_LOADUPD:
+    case IX86_BUILTIN_LOADDQU:
+    case IX86_BUILTIN_LOADUPD256:
+    case IX86_BUILTIN_LOADUPS256:
+    case IX86_BUILTIN_LOADDQU256:
+    case IX86_BUILTIN_LDDQU256:
+      if (i == -1 || i == 0)
+       return true;
+      break;
+    /* STORE.  */
+    case IX86_BUILTIN_STOREHPS:
+    case IX86_BUILTIN_STORELPS:
+    case IX86_BUILTIN_STOREUPS:
+    case IX86_BUILTIN_STOREUPD:
+    case IX86_BUILTIN_STOREDQU:
+    case IX86_BUILTIN_STOREUPD256:
+    case IX86_BUILTIN_STOREUPS256:
+    case IX86_BUILTIN_STOREDQU256:
+      if (i == -1 || i == 0)
+       return true;
+    default:
+      break;
+    }
+  return false;
+}
+
 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
    to return a pointer to VERSION_DECL if the outcome of the expression
    formed by PREDICATE_CHAIN is true.  This function will be called during
@@ -32525,7 +32569,13 @@ ix86_expand_special_args_builtin (const
          if (i == memory)
            {
              /* This must be the memory operand.  */
-             op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
+
+             /* We expect the builtin could be expanded to rtl with memory
+                operand and proper addressing mode will be kept as specified
+                in TARGET_MEM_REF.  */
+             if (!(TREE_CODE (arg) == ADDR_EXPR
+                   && TREE_CODE (TREE_OPERAND (arg, 0)) == TARGET_MEM_REF))
+               op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
              op = gen_rtx_MEM (mode, op);
              gcc_assert (GET_MODE (op) == mode
                          || GET_MODE (op) == VOIDmode);
@@ -43737,6 +43787,9 @@ ix86_atomic_assign_expand_fenv (tree *ho
 #undef TARGET_BUILTIN_RECIPROCAL
 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal

+#undef TARGET_BUILTIN_HAS_MEM_REF_P
+#define TARGET_BUILTIN_HAS_MEM_REF_P ix86_builtin_has_mem_ref_p
+
 #undef TARGET_ASM_FUNCTION_EPILOGUE
 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue

Index: doc/tm.texi
===================================================================
--- doc/tm.texi (revision 204792)
+++ doc/tm.texi (working copy)
@@ -709,6 +709,12 @@ If a target implements string objects th
 If a target implements string objects then this hook should should
provide a facility to check the function arguments in @var{args_list}
against the format specifiers in @var{format_arg} where the type of
@var{format_arg} is one recognized as a valid string reference type.
 @end deftypefn

+@deftypefn {Target Hook} bool TARGET_BUILTIN_HAS_MEM_REF_P (int
@var{builtin_function}, int @var{i})
+This hook return whether the @var{i}th param of the @var{builtin_function}
+is a memory reference. If @var{i} is -1, return whether the
@var{builtin_function}
+contains any memory reference type param.
+@end deftypefn
+
 @deftypefn {Target Hook} void TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE (void)
 This target function is similar to the hook @code{TARGET_OPTION_OVERRIDE}
 but is called when the optimize level is changed via an attribute or
Index: doc/tm.texi.in
===================================================================
--- doc/tm.texi.in      (revision 204792)
+++ doc/tm.texi.in      (working copy)
@@ -697,6 +697,8 @@ should use @code{TARGET_HANDLE_C_OPTION}

 @hook TARGET_CHECK_STRING_OBJECT_FORMAT_ARG

+@hook TARGET_BUILTIN_HAS_MEM_REF_P
+
 @hook TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE

 @defmac C_COMMON_OVERRIDE_OPTIONS
Index: target.def
===================================================================
--- target.def  (revision 204792)
+++ target.def  (working copy)
@@ -1742,6 +1742,14 @@ HOOK_VECTOR_END (vectorize)
 #undef HOOK_PREFIX
 #define HOOK_PREFIX "TARGET_"

+DEFHOOK
+(builtin_has_mem_ref_p,
+ "This hook return whether the @var{i}th param of the @var{builtin_function}\n\
+is a memory reference. If @var{i} is -1, return whether the
@var{builtin_function}\n\
+contains any memory reference type param.",
+ bool, (int builtin_function, int i),
+ default_builtin_has_mem_ref_p)
+
 /* Allow target specific overriding of option settings after options have
   been changed by an attribute or pragma or when it is reset at the
   end of the code affected by an attribute or pragma.  */
Index: testsuite/gcc.dg/tree-ssa/ivopt_5.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ivopt_5.c (revision 0)
+++ testsuite/gcc.dg/tree-ssa/ivopt_5.c (revision 0)
@@ -0,0 +1,21 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
+
+/* Make sure only one iv is selected after IVOPT.  */
+
+#include <x86intrin.h>
+extern __m128i arr[], d[];
+void test (void)
+{
+    unsigned int b;
+    for (b = 0; b < 1000; b += 2) {
+      __m128i *p = (__m128i *)(&d[b]);
+      __m128i a = _mm_load_si128(&arr[4*b+3]);
+      __m128i v = _mm_loadu_si128(p);
+      v = _mm_xor_si128(v, a);
+      _mm_storeu_si128(p, v);
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]