[PATCH v2 3/9] Introduce can_vector_compare_p function

Ilya Leoshkevich iii@linux.ibm.com
Mon Aug 26 12:18:00 GMT 2019


> Am 26.08.2019 um 10:49 schrieb Richard Biener <richard.guenther@gmail.com>:
> 
> On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>> 
>>> Am 23.08.2019 um 13:24 schrieb Richard Biener <richard.guenther@gmail.com>:
>>> 
>>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford
>>> <richard.sandiford@arm.com> wrote:
>>>> 
>>>> Ilya Leoshkevich <iii@linux.ibm.com> writes:
>>>>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode,
>>>>>  return 0;
>>>>> }
>>>>> 
>>>>> +/* can_vector_compare_p presents fake rtx binary operations to the the back-end
>>>>> +   in order to determine its capabilities.  In order to avoid creating fake
>>>>> +   operations on each call, values from previous calls are cached in a global
>>>>> +   cached_binops hash_table.  It contains rtxes, which can be looked up using
>>>>> +   binop_keys.  */
>>>>> +
>>>>> +struct binop_key {
>>>>> +  enum rtx_code code;        /* Operation code.  */
>>>>> +  machine_mode value_mode;   /* Result mode.     */
>>>>> +  machine_mode cmp_op_mode;  /* Operand mode.    */
>>>>> +};
>>>>> +
>>>>> +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> {
>>>>> +  typedef rtx value_type;
>>>>> +  typedef binop_key compare_type;
>>>>> +
>>>>> +  static hashval_t
>>>>> +  hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode)
>>>>> +  {
>>>>> +    inchash::hash hstate (0);
>>>>> +    hstate.add_int (code);
>>>>> +    hstate.add_int (value_mode);
>>>>> +    hstate.add_int (cmp_op_mode);
>>>>> +    return hstate.end ();
>>>>> +  }
>>>>> +
>>>>> +  static hashval_t
>>>>> +  hash (const rtx &ref)
>>>>> +  {
>>>>> +    return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0)));
>>>>> +  }
>>>>> +
>>>>> +  static bool
>>>>> +  equal (const rtx &ref1, const binop_key &ref2)
>>>>> +  {
>>>>> +    return (GET_CODE (ref1) == ref2.code)
>>>>> +        && (GET_MODE (ref1) == ref2.value_mode)
>>>>> +        && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode);
>>>>> +  }
>>>>> +};
>>>>> +
>>>>> +static GTY ((cache)) hash_table<binop_hasher> *cached_binops;
>>>>> +
>>>>> +static rtx
>>>>> +get_cached_binop (enum rtx_code code, machine_mode value_mode,
>>>>> +               machine_mode cmp_op_mode)
>>>>> +{
>>>>> +  if (!cached_binops)
>>>>> +    cached_binops = hash_table<binop_hasher>::create_ggc (1024);
>>>>> +  binop_key key = { code, value_mode, cmp_op_mode };
>>>>> +  hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode);
>>>>> +  rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT);
>>>>> +  if (!*slot)
>>>>> +    *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode),
>>>>> +                         gen_reg_rtx (cmp_op_mode));
>>>>> +  return *slot;
>>>>> +}
>>>> 
>>>> Sorry, I didn't mean anything this complicated.  I just meant that
>>>> we should have a single cached rtx that we can change via PUT_CODE and
>>>> PUT_MODE_RAW for each new query, rather than allocating a new rtx each
>>>> time.
>>>> 
>>>> Something like:
>>>> 
>>>> static GTY ((cache)) rtx cached_binop;
>>>> 
>>>> rtx
>>>> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode)
>>>> {
>>>> if (cached_binop)
>>>>   {
>>>>     PUT_CODE (cached_binop, code);
>>>>     PUT_MODE_RAW (cached_binop, mode);
>>>>     PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode);
>>>>     PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode);
>>>>   }
>>>> else
>>>>   {
>>>>     rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
>>>>     rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2);
>>>>     cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2);
>>>>   }
>>>> return cached_binop;
>>>> }
>>> 
>>> Hmm, maybe we need  auto_rtx (code) that constructs such
>>> RTX on the stack instead of wasting a GC root (and causing
>>> issues for future threading of GCC ;)).
>> 
>> Do you mean something like this?
>> 
>> union {
>>  char raw[rtx_code_size[code]];
>>  rtx rtx;
>> } binop;
>> 
>> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show
>> anything useful), or should I implement this?
> 
> It doesn't exist AFAIK, I thought about using alloca like
> 
> rtx tem;
> rtx_alloca (tem, PLUS);
> 
> and due to using alloca rtx_alloca has to be a macro like
> 
> #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code));
> memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code);
> 
> maybe C++ can help making this prettier but of course
> since we use alloca we have to avoid opening new scopes.
> 
> I guess templates like with auto_vec doesn't work unless
> we can make RTX_CODE_SIZE constant-evaluated.
> 
> Richard.

I ended up with the following change:

diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index a667cdab94e..97aa2144e95 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -466,17 +466,25 @@ set_mode_and_regno (rtx x, machine_mode mode, unsigned int regno)
   set_regno_raw (x, regno, nregs);
 }
 
-/* Generate a new REG rtx.  Make sure ORIGINAL_REGNO is set properly, and
+/* Initialize a REG rtx.  Make sure ORIGINAL_REGNO is set properly, and
    don't attempt to share with the various global pieces of rtl (such as
    frame_pointer_rtx).  */
 
-rtx
-gen_raw_REG (machine_mode mode, unsigned int regno)
+void
+init_raw_REG (rtx x, machine_mode mode, unsigned int regno)
 {
-  rtx x = rtx_alloc (REG MEM_STAT_INFO);
   set_mode_and_regno (x, mode, regno);
   REG_ATTRS (x) = NULL;
   ORIGINAL_REGNO (x) = regno;
+}
+
+/* Generate a new REG rtx.  */
+
+rtx
+gen_raw_REG (machine_mode mode, unsigned int regno)
+{
+  rtx x = rtx_alloc (REG MEM_STAT_INFO);
+  init_raw_REG (x, mode, regno);
   return x;
 }
 
diff --git a/gcc/gengenrtl.c b/gcc/gengenrtl.c
index 5c78fabfb50..bb2087da258 100644
--- a/gcc/gengenrtl.c
+++ b/gcc/gengenrtl.c
@@ -231,8 +231,7 @@ genmacro (int idx)
   puts (")");
 }
 
-/* Generate the code for the function to generate RTL whose
-   format is FORMAT.  */
+/* Generate the code for functions to generate RTL whose format is FORMAT.  */
 
 static void
 gendef (const char *format)
@@ -240,22 +239,18 @@ gendef (const char *format)
   const char *p;
   int i, j;
 
-  /* Start by writing the definition of the function name and the types
+  /* Write the definition of the init function name and the types
      of the arguments.  */
 
-  printf ("static inline rtx\ngen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format);
+  puts ("static inline void");
+  printf ("init_rtx_fmt_%s (rtx rt, machine_mode mode", format);
   for (p = format, i = 0; *p != 0; p++)
     if (*p != '0')
       printf (",\n\t%sarg%d", type_from_format (*p), i++);
+  puts (")");
 
-  puts (" MEM_STAT_DECL)");
-
-  /* Now write out the body of the function itself, which allocates
-     the memory and initializes it.  */
+  /* Now write out the body of the init function itself.  */
   puts ("{");
-  puts ("  rtx rt;");
-  puts ("  rt = rtx_alloc (code PASS_MEM_STAT);\n");
-
   puts ("  PUT_MODE_RAW (rt, mode);");
 
   for (p = format, i = j = 0; *p ; ++p, ++i)
@@ -266,16 +261,56 @@ gendef (const char *format)
     else
       printf ("  %s (rt, %d) = arg%d;\n", accessor_from_format (*p), i, j++);
 
-  puts ("\n  return rt;\n}\n");
+  puts ("}\n");
+
+  /* Write the definition of the gen function name and the types
+     of the arguments.  */
+
+  puts ("static inline rtx");
+  printf ("gen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format);
+  for (p = format, i = 0; *p != 0; p++)
+    if (*p != '0')
+      printf (",\n\t%sarg%d", type_from_format (*p), i++);
+  puts (" MEM_STAT_DECL)");
+
+  /* Now write out the body of the function itself, which allocates
+     the memory and initializes it.  */
+  puts ("{");
+  puts ("  rtx rt;\n");
+
+  puts ("  rt = rtx_alloc (code PASS_MEM_STAT);");
+  printf ("  init_rtx_fmt_%s (rt, mode", format);
+  for (p = format, i = 0; *p != 0; p++)
+    if (*p != '0')
+      printf (", arg%d", i++);
+  puts (");\n");
+
+  puts ("  return rt;\n}\n");
+
+  /* Write the definition of gen macro.  */
+
   printf ("#define gen_rtx_fmt_%s(c, m", format);
   for (p = format, i = 0; *p != 0; p++)
     if (*p != '0')
-      printf (", p%i",i++);
-  printf (")\\\n        gen_rtx_fmt_%s_stat (c, m", format);
+      printf (", arg%d", i++);
+  printf (") \\\n  gen_rtx_fmt_%s_stat ((c), (m)", format);
   for (p = format, i = 0; *p != 0; p++)
     if (*p != '0')
-      printf (", p%i",i++);
+      printf (", (arg%d)", i++);
   printf (" MEM_STAT_INFO)\n\n");
+
+  /* Write the definition of alloca macro.  */
+
+  printf ("#define alloca_rtx_fmt_%s(rt, c, m", format);
+  for (p = format, i = 0; *p != 0; p++)
+    if (*p != '0')
+      printf (", arg%d", i++);
+  printf (") \\\n  rtx_alloca ((rt), (c)); \\\n");
+  printf ("  init_rtx_fmt_%s ((rt), (m)", format);
+  for (p = format, i = 0; *p != 0; p++)
+    if (*p != '0')
+      printf (", (arg%d)", i++);
+  printf (")\n\n");
 }
 
 /* Generate the documentation header for files we write.  */
diff --git a/gcc/rtl.h b/gcc/rtl.h
index efb9b3ce40d..44733d8a39e 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -2933,6 +2933,10 @@ extern HOST_WIDE_INT get_stack_check_protect (void);
 
 /* In rtl.c */
 extern rtx rtx_alloc (RTX_CODE CXX_MEM_STAT_INFO);
+#define rtx_alloca(rt, code) \
+  (rt) = (rtx) alloca (RTX_CODE_SIZE ((code))); \
+  memset ((rt), 0, RTX_HDR_SIZE); \
+  PUT_CODE ((rt), (code));
 extern rtx rtx_alloc_stat_v (RTX_CODE MEM_STAT_DECL, int);
 #define rtx_alloc_v(c, SZ) rtx_alloc_stat_v (c MEM_STAT_INFO, SZ)
 #define const_wide_int_alloc(NWORDS)				\
@@ -3797,7 +3801,11 @@ gen_rtx_INSN (machine_mode mode, rtx_insn *prev_insn, rtx_insn *next_insn,
 extern rtx gen_rtx_CONST_INT (machine_mode, HOST_WIDE_INT);
 extern rtx gen_rtx_CONST_VECTOR (machine_mode, rtvec);
 extern void set_mode_and_regno (rtx, machine_mode, unsigned int);
+extern void init_raw_REG (rtx, machine_mode, unsigned int);
 extern rtx gen_raw_REG (machine_mode, unsigned int);
+#define alloca_raw_REG(rt, mode, regno) \
+  rtx_alloca ((rt), REG); \
+  init_raw_REG ((rt), (mode), (regno))
 extern rtx gen_rtx_REG (machine_mode, unsigned int);
 extern rtx gen_rtx_SUBREG (machine_mode, rtx, poly_uint64);
 extern rtx gen_rtx_MEM (machine_mode, rtx);

which now allows me to write:

rtx reg1, reg2, test;
alloca_raw_REG (reg1, cmp_op_mode, LAST_VIRTUAL_REGISTER + 1);
alloca_raw_REG (reg2, cmp_op_mode, LAST_VIRTUAL_REGISTER + 2);
alloca_rtx_fmt_ee (test, code, value_mode, reg1, reg2);

If that looks ok, I'll resend the series.



More information about the Gcc-patches mailing list