[GOOGLE] Builtins handling in IVOPT

Wei Mi wmi@google.com
Wed Jan 22 22:23:00 GMT 2014


This patch handles the mem access builtins in ivopt. The original
problem described here:
http://gcc.gnu.org/ml/gcc-patches/2013-11/msg02648.html

Bootstrapped and passed regression test. Performance test ok for
plain, fdo and lipo. Ok for google 4.8 branch?

Thanks,
Wei.

--- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/expr.c
2013-12-21 21:16:11.006695060 -0800
+++ gcc/expr.c  2013-12-09 11:16:22.698063077 -0800
@@ -7537,7 +7537,21 @@
          tem = fold_build_pointer_plus (tem, TREE_OPERAND (exp, 1));
        return expand_expr (tem, target, tmode, modifier);
       }
+    case TARGET_MEM_REF:
+      {
+       struct mem_address addr;
+       int old_cse_not_expected;
+       addr_space_t as
+         = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (exp, 0))));

+       get_address_description (exp, &addr);
+       result = addr_for_mem_ref (&addr, as, true);
+       old_cse_not_expected = cse_not_expected;
+       cse_not_expected = 1;
+       result = memory_address_addr_space (tmode, result, as);
+       cse_not_expected = old_cse_not_expected;
+       return result;
+      }
     case CONST_DECL:
       /* Expand the initializer like constants above.  */
       result = XEXP (expand_expr_constant (DECL_INITIAL (exp),
@@ -9579,10 +9593,14 @@
        struct mem_address addr;
        enum insn_code icode;
        unsigned int align;
+       int old_cse_not_expected;

        get_address_description (exp, &addr);
        op0 = addr_for_mem_ref (&addr, as, true);
+       old_cse_not_expected = cse_not_expected;
+       cse_not_expected = 1;
        op0 = memory_address_addr_space (mode, op0, as);
+       cse_not_expected = old_cse_not_expected;
        temp = gen_rtx_MEM (mode, op0);
        set_mem_attributes (temp, exp, 0);
        set_mem_addr_space (temp, as);
--- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/target.def
    2013-12-21 21:16:10.926694446 -0800
+++ gcc/target.def      2013-12-09 10:46:07.794472365 -0800
@@ -1119,6 +1119,14 @@
 #undef HOOK_PREFIX
 #define HOOK_PREFIX "TARGET_"

+DEFHOOK
+(builtin_has_mem_ref_p,
+ "This hook return whether the @var{i}th param of the @var{builtin_function}\n\
+is a memory reference. If @var{i} is -1, return whether the
@var{builtin_function}\n\
+contains any memory reference type param.",
+ bool, (int builtin_function, int i),
+ default_builtin_has_mem_ref_p)
+
 /* Allow target specific overriding of option settings after options have
   been changed by an attribute or pragma or when it is reset at the
   end of the code affected by an attribute or pragma.  */
--- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/gimple.c
      2013-12-21 21:16:11.066695519 -0800
+++ gcc/gimple.c        2013-12-09 10:50:09.096289179 -0800
@@ -2574,7 +2574,8 @@
 is_gimple_addressable (tree t)
 {
   return (is_gimple_id (t) || handled_component_p (t)
-         || TREE_CODE (t) == MEM_REF);
+         || TREE_CODE (t) == MEM_REF
+         || TREE_CODE (t) == TARGET_MEM_REF);
 }

 /* Return true if T is a valid gimple constant.  */
@@ -2625,7 +2626,8 @@
       op = TREE_OPERAND (op, 0);
     }

-  if (CONSTANT_CLASS_P (op) || TREE_CODE (op) == MEM_REF)
+  if (CONSTANT_CLASS_P (op) || TREE_CODE (op) == MEM_REF
+      || TREE_CODE (op) == TARGET_MEM_REF)
     return true;

   switch (TREE_CODE (op))
--- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/targhooks.c
   2013-12-21 21:16:10.926694446 -0800
+++ gcc/targhooks.c     2013-12-09 10:45:50.154339207 -0800
@@ -543,6 +543,13 @@
 }

 bool
+default_builtin_has_mem_ref_p (int built_in_function ATTRIBUTE_UNUSED,
+                              int i ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+bool
 hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false (
        cumulative_args_t ca ATTRIBUTE_UNUSED,
        enum machine_mode mode ATTRIBUTE_UNUSED,
--- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/tree-ssa-loop-ivopts.c
2014-01-22 11:57:10.420073494 -0800
+++ gcc/tree-ssa-loop-ivopts.c  2014-01-22 14:00:14.177908568 -0800
@@ -96,7 +96,7 @@
 /* The infinite cost.  */
 #define INFTY 10000000

-#define AVG_LOOP_NITER(LOOP) 5
+#define AVG_LOOP_NITER(LOOP) 50

 /* Returns the expected number of loop iterations for LOOP.
    The average trip count is computed from profile data if it
@@ -1785,7 +1785,8 @@

       /* Check that the base expression is addressable.  This needs
         to be done after substituting bases of IVs into it.  */
-      if (may_be_nonaddressable_p (base))
+      if (may_be_nonaddressable_p (base)
+         && REFERENCE_CLASS_P (base))
        goto fail;

       /* Moreover, on strict alignment platforms, check that it is
@@ -1793,7 +1794,11 @@
       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
        goto fail;

-      base = build_fold_addr_expr (base);
+      /* If base is of reference class, build its addr expr here. If
+        base is already an address, then don't need to build addr
+        expr.  */
+      if (REFERENCE_CLASS_P (base))
+       base = build_fold_addr_expr (base);

       /* Substituting bases of IVs into the base expression might
         have caused folding opportunities.  */
@@ -1837,13 +1842,36 @@
     }
 }

+/* Find whether the Ith param of the BUILTIN is a mem
+   reference. If I is -1, it returns whether the BUILTIN
+   contains any mem reference type param.  */
+
+static bool
+builtin_has_mem_ref_p (gimple builtin, int i)
+{
+  tree fndecl = gimple_call_fndecl (builtin);
+  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+    {
+      switch (DECL_FUNCTION_CODE (fndecl))
+       {
+       case BUILT_IN_PREFETCH:
+         if (i == -1 || i == 0)
+           return true;
+       }
+    }
+  else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
+    return targetm.builtin_has_mem_ref_p ((int) DECL_FUNCTION_CODE
(fndecl), i);
+
+  return false;
+}
+
 /* Finds interesting uses of induction variables in the statement STMT.  */

 static void
 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
 {
   struct iv *iv;
-  tree op, *lhs, *rhs;
+  tree op, *lhs, *rhs, callee;
   ssa_op_iter iter;
   use_operand_p use_p;
   enum tree_code code;
@@ -1891,14 +1919,36 @@
          find_interesting_uses_cond (data, stmt);
          return;
        }
+    }
+  /* Handle builtin call which could be expanded to insn
+     with memory access operands. Generate USE_ADDRESS type
+     use for address expr which is used for memory access of
+     such builtin.  */
+  else if (is_gimple_call (stmt)
+          && (callee = gimple_call_fndecl (stmt))
+          && is_builtin_fn (callee)
+          && builtin_has_mem_ref_p (stmt, -1))
+    {
+      size_t i;
+      for (i = 0; i < gimple_call_num_args (stmt); i++)
+       {
+         if (builtin_has_mem_ref_p (stmt, i))
+           {
+             tree *arg = gimple_call_arg_ptr (stmt, i);

-      /* TODO -- we should also handle address uses of type
-
-        memory = call (whatever);
-
-        and
+             if (TREE_CODE (*arg) != SSA_NAME)
+               continue;

-        call (memory).  */
+             gcc_assert (POINTER_TYPE_P (TREE_TYPE (*arg)));
+             find_interesting_uses_address (data, stmt, arg);
+           }
+         else
+           {
+             tree arg = gimple_call_arg (stmt, i);
+             find_interesting_uses_op (data, arg);
+           }
+       }
+      return;
     }

   if (gimple_code (stmt) == GIMPLE_PHI
@@ -3845,9 +3895,12 @@
 {
   aff_tree ubase_aff, cbase_aff;
   tree expr, ub, cb;
+  unsigned HOST_WIDE_INT uoffset, coffset;

   STRIP_NOPS (ubase);
   STRIP_NOPS (cbase);
+  ubase = strip_offset (ubase, &uoffset);
+  cbase = strip_offset (cbase, &coffset);
   ub = ubase;
   cb = cbase;

@@ -6282,7 +6335,7 @@
   aff_tree aff;
   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
   tree base_hint = NULL_TREE;
-  tree ref, iv;
+  tree ref, iv, callee;
   bool ok;

   adjust_iv_update_pos (cand, use);
@@ -6305,10 +6358,41 @@
     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);

   iv = var_at_stmt (data->current_loop, cand, use->stmt);
-  ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
-                       reference_alias_ptr_type (*use->op_p),
-                       iv, base_hint, data->speed);
-  copy_ref_info (ref, *use->op_p);
+
+  /*  For builtin_call(addr_expr), change it to:
+       tmp = ADDR_EXPR(TMR(...));
+       call (tmp);  */
+  if (is_gimple_call (use->stmt)
+      && (callee = gimple_call_fndecl (use->stmt))
+      && is_builtin_fn (callee))
+    {
+      gimple g;
+      gimple_seq seq = NULL;
+      tree addr;
+      tree type = TREE_TYPE (TREE_TYPE (*use->op_p));
+      location_t loc = gimple_location (use->stmt);
+      gimple_stmt_iterator gsi = gsi_for_stmt (use->stmt);
+
+      ref = create_mem_ref (&bsi, type, &aff,
+                           TREE_TYPE (*use->op_p),
+                           iv, base_hint, data->speed);
+      addr = build1 (ADDR_EXPR, TREE_TYPE (*use->op_p), ref);
+      g = gimple_build_assign_with_ops (ADDR_EXPR,
+                               make_ssa_name (TREE_TYPE (*use->op_p), NULL),
+                               addr, NULL);
+      gimple_set_location (g, loc);
+      gimple_seq_add_stmt_without_update (&seq, g);
+      gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
+
+      ref = gimple_assign_lhs (g);
+    }
+  else
+    {
+      ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
+                           reference_alias_ptr_type (*use->op_p),
+                           iv, base_hint, data->speed);
+      copy_ref_info (ref, *use->op_p);
+    }
   *use->op_p = ref;
 }

--- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/config/i386/i386.c
    2013-12-21 21:15:57.326590210 -0800
+++ gcc/config/i386/i386.c      2013-12-09 10:46:07.774472216 -0800
@@ -29204,6 +29204,50 @@
     }
 }

+/* Return whether the Ith param of the BUILTIN_FUNCTION
+   is a memory reference. If I == -1, return whether the
+   BUILTIN_FUNCTION contains any memory reference param.  */
+
+static bool
+ix86_builtin_has_mem_ref_p (int builtin_function, int i)
+{
+  switch ((enum ix86_builtins) builtin_function)
+    {
+    /* LOAD.  */
+    case IX86_BUILTIN_LOADHPS:
+    case IX86_BUILTIN_LOADLPS:
+    case IX86_BUILTIN_LOADHPD:
+    case IX86_BUILTIN_LOADLPD:
+      if (i == -1 || i == 1)
+       return true;
+      break;
+    case IX86_BUILTIN_LOADUPS:
+    case IX86_BUILTIN_LOADUPD:
+    case IX86_BUILTIN_LOADDQU:
+    case IX86_BUILTIN_LOADUPD256:
+    case IX86_BUILTIN_LOADUPS256:
+    case IX86_BUILTIN_LOADDQU256:
+    case IX86_BUILTIN_LDDQU256:
+      if (i == -1 || i == 0)
+       return true;
+      break;
+    /* STORE.  */
+    case IX86_BUILTIN_STOREHPS:
+    case IX86_BUILTIN_STORELPS:
+    case IX86_BUILTIN_STOREUPS:
+    case IX86_BUILTIN_STOREUPD:
+    case IX86_BUILTIN_STOREDQU:
+    case IX86_BUILTIN_STOREUPD256:
+    case IX86_BUILTIN_STOREUPS256:
+    case IX86_BUILTIN_STOREDQU256:
+      if (i == -1 || i == 0)
+       return true;
+    default:
+      break;
+    }
+  return false;
+}
+
 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
    to return a pointer to VERSION_DECL if the outcome of the expression
    formed by PREDICATE_CHAIN is true.  This function will be called during
@@ -32080,7 +32124,13 @@
          if (i == memory)
            {
              /* This must be the memory operand.  */
-             op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
+
+             /* We expect the builtin could be expanded to rtl with memory
+                operand and proper addressing mode will be kept as specified
+                in TARGET_MEM_REF.  */
+             if (!(TREE_CODE (arg) == ADDR_EXPR
+                   && TREE_CODE (TREE_OPERAND (arg, 0)) == TARGET_MEM_REF))
+               op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
              op = gen_rtx_MEM (mode, op);
              gcc_assert (GET_MODE (op) == mode
                          || GET_MODE (op) == VOIDmode);
@@ -43035,6 +43085,9 @@
 #undef TARGET_ASM_FUNCTION_PROLOGUE
 #define TARGET_ASM_FUNCTION_PROLOGUE ix86_output_function_prologue

+#undef TARGET_BUILTIN_HAS_MEM_REF_P
+#define TARGET_BUILTIN_HAS_MEM_REF_P ix86_builtin_has_mem_ref_p
+
 #undef TARGET_ASM_FUNCTION_EPILOGUE
 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue

--- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/doc/tm.texi.in
2013-12-21 21:08:29.623160737 -0800
+++ gcc/doc/tm.texi.in  2013-12-09 10:45:50.124338984 -0800
@@ -696,6 +696,8 @@

 @hook TARGET_CHECK_STRING_OBJECT_FORMAT_ARG

+@hook TARGET_BUILTIN_HAS_MEM_REF_P
+
 @hook TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 This target function is similar to the hook @code{TARGET_OPTION_OVERRIDE}
 but is called when the optimize level is changed via an attribute or
--- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/doc/tm.texi
   2013-12-21 21:08:29.613160659 -0800
+++ gcc/doc/tm.texi     2013-12-09 10:45:50.144339134 -0800
@@ -708,6 +708,12 @@
 If a target implements string objects then this hook should should
provide a facility to check the function arguments in @var{args_list}
against the format specifiers in @var{format_arg} where the type of
@var{format_arg} is one recognized as a valid string reference type.
 @end deftypefn

+@deftypefn {Target Hook} bool TARGET_BUILTIN_HAS_MEM_REF_P (int
@var{builtin_function}, int @var{i})
+This hook return whether the @var{i}th param of the @var{builtin_function}
+is a memory reference. If @var{i} is -1, return whether the
@var{builtin_function}
+contains any memory reference type param.
+@end deftypefn
+
 @deftypefn {Target Hook} void TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE (void)
 This target function is similar to the hook @code{TARGET_OPTION_OVERRIDE}
 but is called when the optimize level is changed via an attribute or
--- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c
      2013-12-21 21:12:54.275187550 -0800
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c        2014-01-22
11:12:44.108736315 -0800
@@ -20,5 +20,5 @@
   return s;
 }

-/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */
+/* { dg-final { scan-tree-dump-times "Replacing" 2 "ivopts"} } */
 /* { dg-final { cleanup-tree-dump "ivopts" } } */
--- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c
      2013-12-21 21:12:54.305187781 -0800
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c        2014-01-21
23:01:16.220074850 -0800
@@ -21,5 +21,5 @@
   return s;
 }

-/* { dg-final { scan-tree-dump-times "Replacing" 0 "ivopts"} } */
+/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */
 /* { dg-final { cleanup-tree-dump "ivopts" } } */
--- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/testsuite/gcc.dg/tree-ssa/ivopt_5.c
   2014-01-22 14:03:29.569415050 -0800
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_5.c     2014-01-22
11:05:43.715445996 -0800
@@ -0,0 +1,21 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
+
+/* Make sure only one iv is selected after IVOPT.  */
+
+#include <x86intrin.h>
+extern __m128i arr[], d[];
+void test (void)
+{
+    unsigned int b;
+    for (b = 0; b < 1000; b += 2) {
+      __m128i *p = (__m128i *)(&d[b]);
+      __m128i a = _mm_load_si128(&arr[4*b+3]);
+      __m128i v = _mm_loadu_si128(p);
+      v = _mm_xor_si128(v, a);
+      _mm_storeu_si128(p, v);
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
--- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/testsuite/gcc.dg/tree-ssa/loop-28.c
   2013-12-21 21:12:54.305187781 -0800
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-28.c     2014-01-22
11:11:31.388167897 -0800
@@ -14,7 +14,13 @@
 /* There should be 64 MEMs in the unrolled loop and one more in the
copy of the loop
    for the rest of the iterations.  */

-/* { dg-final { scan-tree-dump-times "MEM" 65 "optimized" } } */
+/* We generate prefetch(addr) as:
+   tmp = &MEM(addr);
+   prefetch(tmp);
+   to let ivopt understand the memory access mode better, so there is
another MEM in
+   unrolled loop.  */
+
+/* { dg-final { scan-tree-dump-times "MEM" 66 "optimized" } } */

 /* There should be no i_a = i_b assignments.  */
 /* { dg-final { scan-tree-dump-times "i_.*= i_\[0-9\]*;" 0 "aprefetch" } } */



More information about the Gcc-patches mailing list