This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] handle bzero/bcopy in DSE and aliasing (PR 80933, 80934)


On 06/07/2017 02:12 PM, Martin Sebor wrote:
On 06/07/2017 02:01 PM, Marc Glisse wrote:
On Wed, 7 Jun 2017, Bernhard Reutner-Fischer wrote:

On 7 June 2017 16:46:53 CEST, Martin Sebor <msebor@gmail.com> wrote:
On 06/07/2017 02:23 AM, Richard Biener wrote:
On Wed, Jun 7, 2017 at 5:26 AM, Martin Sebor <msebor@gmail.com>
wrote:
Note I'd be _much_ more sympathetic to simply canonicalizing all of
bzero and bcopy
to memset / memmove and be done with all the above complexity.


Attached is an updated patch along these lines.  Please let me
know if it matches your expectations.

I think you attached the wrong patch.

Yes I did, sorry.  The correct one is attached.

Under POSIX.1-2008 "optimizing" bzero or bcmp is IMO plain wrong.

It's like optimizing foo() to a random built-in but maybe that's just
me. If your libc provides a define to a standard function for these
under a compat knob then fine but otherwise you should fix that.
*shrug*. Joseph?

The patch optimizes __builtin_bzero, which should be ok. The question
(independent from this patch) is then under what conditions bzero should
be detected as a builtin.

Yes.  The problem is that unlike for C and C++, GCC doesn't have
a mechanism to select the target version of POSIX.  I think it
should.

But there is a subtle problem with the patch that needs fixing.
Bcopy should not be transformed to memcpy but rather memmove.
I'll fix that before committing.

Attached is an updated patch with this fix.  I also added a cast
from bcopy and bzero to void to detect accidental uses of the
return value.  Tested on x86_64-linux.

Martin
PR tree-optimization/80934 - bzero should be assumed not to escape pointer argument
PR tree-optimization/80933 - redundant bzero/bcopy calls not eliminated

gcc/ChangeLog:

	PR tree-optimization/80933
	PR tree-optimization/80934
	* builtins.c (fold_builtin_bcmp, fold_builtin_bcopy): New functions.
	(fold_builtin_bzero): Likewise.
	(fold_builtin_2): Handle bzero.
	(fold_builtin_3): Handle bcmp and bcpy.

gcc/testsuite/ChangeLog:

	PR tree-optimization/80933
	PR tree-optimization/80934
	* gcc.dg/fold-bcopy.c: New test.
	* gcc.dg/tree-ssa/ssa-dse-30.c: Likewise..
	* gcc.dg/tree-ssa/alias-36.c: Likewise.
	* gcc/testsuite/gcc.dg/pr79214.c: Adjust.
	* gcc.dg/tree-prof/val-prof-7.c: Likewise.
	* gcc.dg/Wsizeof-pointer-memaccess1.c: Likewise.
	* gcc.dg/builtins-nonnull.c: Likewise.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 30462ad..52d42b9 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -145,6 +145,9 @@ static rtx expand_builtin_unop (machine_mode, tree, rtx, rtx, optab);
 static rtx expand_builtin_frame_address (tree, tree);
 static tree stabilize_va_list_loc (location_t, tree, int);
 static rtx expand_builtin_expect (tree, rtx);
+static tree fold_builtin_bcmp (location_t, tree, tree, tree);
+static tree fold_builtin_bcopy (location_t, tree, tree, tree);
+static tree fold_builtin_bzero (location_t, tree, tree);
 static tree fold_builtin_constant_p (tree);
 static tree fold_builtin_classify_type (tree);
 static tree fold_builtin_strlen (location_t, tree, tree);
@@ -7982,6 +7985,56 @@ fold_builtin_sincos (location_t loc,
 			 fold_build1_loc (loc, REALPART_EXPR, type, call)));
 }
 
+/* Fold function call to built-in bzero with arguments SRC and LEN
+   into a call to built-in memset (DST, 0, LEN).  */
+
+static tree
+fold_builtin_bzero (location_t loc, tree dst, tree len)
+{
+  if (!validate_arg (dst, POINTER_TYPE)
+      || !validate_arg (len, INTEGER_TYPE))
+    return NULL_TREE;
+
+  tree fn = builtin_decl_implicit (BUILT_IN_MEMSET);
+  /* Call memset and return the result cast to void to detect its use
+     (bzero returns void).  */
+  tree call = build_call_expr_loc (loc, fn, 3, dst, integer_zero_node, len);
+  return fold_convert (void_type_node, call);
+}
+
+/* Fold function call to built-in bcmp with arguments ARG1, ARG2, and LEN
+   into a call to built-in memcmp(ARG1, ARG2, LEN).  */
+
+static tree
+fold_builtin_bcmp (location_t loc, tree arg1, tree arg2, tree len)
+{
+  if (!validate_arg (arg1, POINTER_TYPE)
+      || !validate_arg (arg2, POINTER_TYPE)
+      || !validate_arg (len, INTEGER_TYPE))
+    return NULL_TREE;
+
+  tree fn = builtin_decl_implicit (BUILT_IN_MEMCMP);
+  return build_call_expr_loc (loc, fn, 3, arg1, arg2, len);
+}
+
+/* Fold function call to built-in bcopy with arguments SRC, DST, and LEN
+   into a call to built-in memmove(DST, SRC, LEN).  */
+
+static tree
+fold_builtin_bcopy (location_t loc, tree src, tree dst, tree len)
+{
+  if (!validate_arg (src, POINTER_TYPE)
+      || !validate_arg (dst, POINTER_TYPE)
+      || !validate_arg (len, INTEGER_TYPE))
+    return NULL_TREE;
+
+  /* bcopy has been removed from POSIX in Issue 7 but Issue 6 specifies
+     it's quivalent to memmove (not memcpy).  Call memmove and return
+     the result cast to void to detect its use (bcopy returns void).  */
+  tree fn = builtin_decl_implicit (BUILT_IN_MEMMOVE);
+  return build_call_expr_loc (loc, fn, 3, dst, src, len);
+}
+
 /* Fold function call to builtin memcmp with arguments ARG1 and ARG2.
    Return NULL_TREE if no simplification can be made.  */
 
@@ -8947,6 +9000,9 @@ fold_builtin_2 (location_t loc, tree fndecl, tree arg0, tree arg1)
     CASE_FLT_FN (BUILT_IN_MODF):
       return fold_builtin_modf (loc, arg0, arg1, type);
 
+    case BUILT_IN_BZERO:
+      return fold_builtin_bzero (loc, arg0, arg1);
+
     case BUILT_IN_STRSPN:
       return fold_builtin_strspn (loc, arg0, arg1);
 
@@ -9034,7 +9090,12 @@ fold_builtin_3 (location_t loc, tree fndecl,
 	return do_mpfr_remquo (arg0, arg1, arg2);
     break;
 
+    case BUILT_IN_BCOPY:
+      return fold_builtin_bcopy (loc, arg0, arg1, arg2);;
+
     case BUILT_IN_BCMP:
+      return fold_builtin_bcmp (loc, arg0, arg1, arg2);;
+
     case BUILT_IN_MEMCMP:
       return fold_builtin_memcmp (loc, arg0, arg1, arg2);;
 
diff --git a/gcc/testsuite/gcc.dg/fold-bcopy.c b/gcc/testsuite/gcc.dg/fold-bcopy.c
new file mode 100644
index 0000000..ce5b317
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-bcopy.c
@@ -0,0 +1,43 @@
+/* PR tree-optimization/80933 - redundant bzero/bcopy calls not eliminated
+   { dg-do compile }
+   { dg-options "-O0 -Wall -fdump-tree-gimple" } */
+
+void f0 (void *p, const void *q, unsigned n)
+{
+  /* Bcopy corresponds to memmmove, not memcpy.  */
+  __builtin_bcopy (q, p, n);
+}
+
+void f1 (void *p, const void *q, unsigned n)
+{
+  /* This call should be eliminated.  */
+  __builtin_bcopy (q, p, 0);
+}
+
+int f2 (const void *p, const void *q, unsigned n)
+{
+  return __builtin_bcmp (q, p, n);
+}
+
+int f3 (const void *p, const void *q, unsigned n)
+{
+  /* This call should be folded into 0.  */
+  return __builtin_bcmp (q, p, 0);
+}
+
+void f4 (void *p, unsigned n)
+{
+  __builtin_bzero (p, n);
+}
+
+void f5 (void *p)
+{
+  /* This call should be eliminated.  */
+  __builtin_bzero (p, 0);
+}
+
+/* Verify that calls to bcmp, bcopy, and bzero have all been removed
+   and one of each replaced with memcmp, memmove, and memset, respectively.
+   The remaining three should be eliminated.
+  { dg-final { scan-tree-dump-not "bcmp|bcopy|bzero" "gimple" } }
+  { dg-final { scan-tree-dump-times "memcmp|memmove|memset" 3 "gimple" } } */
diff --git a/gcc/testsuite/gcc.dg/pr79214.c b/gcc/testsuite/gcc.dg/pr79214.c
index 79d2a25..6cf254fb 100644
--- a/gcc/testsuite/gcc.dg/pr79214.c
+++ b/gcc/testsuite/gcc.dg/pr79214.c
@@ -22,7 +22,7 @@ size_t range (void)
 
 void test_bzero (void)
 {
-  bzero (d, range ());   /* { dg-warning ".__builtin_bzero. writing 4 or more bytes into a region of size 3 overflows the destination" } */
+  bzero (d, range ());   /* { dg-warning ".__builtin_(bzero|memset). writing 4 or more bytes into a region of size 3 overflows the destination" } */
 }
 
 void test_memcpy (void)
diff --git a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c
index 84ec9fb..5a4e777 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c
@@ -4,14 +4,10 @@
 char *buffer1;
 char *buffer2;
 
+/* Bzero is not tested because it gets transformed into memset.  */
+
 #define DEFINE_TEST(N) \
 __attribute__((noinline)) \
-void bzero_test_ ## N (int len) \
-{ \
-  __builtin_bzero (buffer1, len); \
-} \
- \
-__attribute__((noinline)) \
 void memcpy_test_ ## N (int len) \
 { \
   __builtin_memcpy (buffer1, buffer2, len); \
@@ -31,7 +27,6 @@ void memset_test_ ## N (int len) \
  \
 void test_stringops_ ## N(int len) \
 { \
-  bzero_test_ ## N (len); \
   memcpy_test_## N (len); \
   mempcpy_test_ ## N (len); \
   memset_test_ ## N (len); \
@@ -64,10 +59,6 @@ int main() {
   return 0;
 }
 
-/* { dg-final-use-not-autofdo { scan-ipa-dump "Single value 8 stringop transformation on __builtin_bzero" "profile" } } */
-/* { dg-final-use-not-autofdo { scan-ipa-dump "Single value 55 stringop transformation on __builtin_bzero" "profile" } } */
-/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Single value 32 stringop transformation on __builtin_bzero" 0 "profile" } } */
-
 /* { dg-final-use-not-autofdo { scan-ipa-dump "Single value 8 stringop transformation on __builtin_memcpy" "profile" } } */
 /* { dg-final-use-not-autofdo { scan-ipa-dump "Single value 55 stringop transformation on __builtin_memcpy" "profile" } } */
 /* { dg-final-use-not-autofdo { scan-ipa-dump-times "Single value 32 stringop transformation on __builtin_memcpy" 0 "profile" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/alias-36.c b/gcc/testsuite/gcc.dg/tree-ssa/alias-36.c
new file mode 100644
index 0000000..61b601a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/alias-36.c
@@ -0,0 +1,28 @@
+/* PR tree-optimization/80934 - bzero should be assumed not to escape
+   pointer argument
+   { dg-do compile }
+   { dg-options "-O2 -fdump-tree-alias" } */
+
+void foobar (void);
+
+void f (void);
+
+void g (void)
+{
+  char d[32];
+  __builtin_memset (d, 0, sizeof d);
+  f ();
+  if (*d != 0)
+    foobar ();
+}
+
+void h (void)
+{
+  char d[32];
+  __builtin_bzero (d, sizeof d);
+  f ();
+  if (*d != 0)
+    foobar ();
+}
+
+/* { dg-final { scan-tree-dump-not "memset|foobar|bzero" "alias" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-30.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-30.c
new file mode 100644
index 0000000..ece8cb2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-30.c
@@ -0,0 +1,31 @@
+/* PR tree-optimization/80933 - redundant bzero/bcopy calls not eliminated
+   { dg-do compile }
+   { dg-options "-O2 -fdump-tree-dse1" } */
+
+void sink (void*);
+
+void test_bcopy (const void *s)
+{
+  char d[33];
+
+  /* Bcopy is transformed into memcpy and those calls are expanded
+     inline in EVRP, before DSE runs, so this test doesn't actually
+     verify that DSE does its job.  */
+  __builtin_bcopy (s, d, sizeof d);
+  __builtin_bcopy (s, d, sizeof d);
+
+  sink (d);
+}
+
+void test_bzero (void)
+{
+  char d[33];
+
+  __builtin_bzero (d, sizeof d);
+  __builtin_bzero (d, sizeof d);
+
+  sink (d);
+}
+
+/* { dg-final { scan-tree-dump-times "builtin_memset" 1 "dse1" } } */
+/* { dg-final { scan-tree-dump-not "builtin_(bcopy|bzero|memcpy)" "dse1" } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]