[PATCH] Avoid introducing UB in match.pd (T)(P+A)-(T)(P+B) and similar optimizations (PR sanitizer/81281)

Jakub Jelinek jakub@redhat.com
Tue Dec 5 13:44:00 GMT 2017


Hi!

As mentioned in the PR, while (T)(P + A) - (T)P -> (T) A
shouldn't introduce UB when it wasn't there earlier,
(T)P - (T)(P + A) -> -(T) A
and
(T)(P + A) - (T)(P + B) -> (T)A - (T)B
can, so if the conversion to T isn't widening and T has undefined overflow,
then we need to perform the negation or minus in unsigned_type_for instead
of T.
Another thing is that while pointer_plus isn't commutative, plus is, so
we should use :c in that case.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-12-05  Jakub Jelinek  <jakub@redhat.com>

	PR sanitizer/81281
	* match.pd ((T)(P + A) - (T)P -> (T) A): Split into separate
	simplify for plus with :c added, and pointer_plus without that.
	((T)P - (T)(P + A) -> -(T) A): Likewise.  If type is integral
	with undefined overflow and the conversion is not widening,
	perform negation in utype and only convert to type afterwards.
	((T)(P + A) - (T)(P + B) -> (T)A - (T)B): Split into separate
	simplify for plus with :c added, and pointer_plus without that.
	If type is integral with undefined overflow and the conversion is
	not widening, perform minus in utype and only convert to type
	afterwards.  Move the last pointer_diff_expr simplify into the
	two outermost ifs.

	* gcc.c-torture/execute/pr81281.c: New test.
	* gcc.dg/pr81281-1.c: New test.
	* gcc.dg/pr81281-2.c: New test.
	* g++.dg/ubsan/pr81281.C: New test.
	* g++.dg/ubsan/pr81281-aux.cc: New test.

--- gcc/match.pd.jj	2017-11-28 09:40:08.000000000 +0100
+++ gcc/match.pd	2017-12-05 11:36:58.855074420 +0100
@@ -1783,28 +1783,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (bit_not @0))
 
   /* (T)(P + A) - (T)P -> (T) A */
-  (for add (plus pointer_plus)
-   (simplify
-    (minus (convert (add @@0 @1))
-     (convert @0))
-    (if (element_precision (type) <= element_precision (TREE_TYPE (@1))
-	 /* For integer types, if A has a smaller type
-	    than T the result depends on the possible
-	    overflow in P + A.
-	    E.g. T=size_t, A=(unsigned)429497295, P>0.
-	    However, if an overflow in P + A would cause
-	    undefined behavior, we can assume that there
-	    is no overflow.  */
-	 || (INTEGRAL_TYPE_P (TREE_TYPE (@0))
-	     && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)))
-	 /* For pointer types, if the conversion of A to the
-	    final type requires a sign- or zero-extension,
-	    then we have to punt - it is not defined which
-	    one is correct.  */
-	 || (POINTER_TYPE_P (TREE_TYPE (@0))
-	     && TREE_CODE (@1) == INTEGER_CST
-	     && tree_int_cst_sign_bit (@1) == 0))
-     (convert @1))))
+  (simplify
+   (minus (convert (plus:c @@0 @1))
+    (convert @0))
+   (if (element_precision (type) <= element_precision (TREE_TYPE (@1))
+	/* For integer types, if A has a smaller type
+	   than T the result depends on the possible
+	   overflow in P + A.
+	   E.g. T=size_t, A=(unsigned)429497295, P>0.
+	   However, if an overflow in P + A would cause
+	   undefined behavior, we can assume that there
+	   is no overflow.  */
+	|| (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+	    && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))))
+    (convert @1)))
+  (simplify
+   (minus (convert (pointer_plus @@0 @1))
+    (convert @0))
+   (if (element_precision (type) <= element_precision (TREE_TYPE (@1))
+	/* For pointer types, if the conversion of A to the
+	   final type requires a sign- or zero-extension,
+	   then we have to punt - it is not defined which
+	   one is correct.  */
+	|| (POINTER_TYPE_P (TREE_TYPE (@0))
+	    && TREE_CODE (@1) == INTEGER_CST
+	    && tree_int_cst_sign_bit (@1) == 0))
+    (convert @1)))
    (simplify
     (pointer_diff (pointer_plus @@0 @1) @0)
     /* The second argument of pointer_plus must be interpreted as signed, and
@@ -1813,10 +1817,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      (convert (convert:stype @1))))
 
   /* (T)P - (T)(P + A) -> -(T) A */
-  (for add (plus pointer_plus)
-   (simplify
-    (minus (convert @0)
-     (convert (add @@0 @1)))
+  (simplify
+   (minus (convert @0)
+    (convert (plus:c @@0 @1)))
+   (if (INTEGRAL_TYPE_P (type)
+	&& TYPE_OVERFLOW_UNDEFINED (type)
+	&& element_precision (type) <= element_precision (TREE_TYPE (@1)))
+    (with { tree utype = unsigned_type_for (type); }
+     (convert (negate (convert:utype @1))))
     (if (element_precision (type) <= element_precision (TREE_TYPE (@1))
 	 /* For integer types, if A has a smaller type
 	    than T the result depends on the possible
@@ -1826,7 +1834,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	    undefined behavior, we can assume that there
 	    is no overflow.  */
 	 || (INTEGRAL_TYPE_P (TREE_TYPE (@0))
-	     && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)))
+	     && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))))
+     (negate (convert @1)))))
+  (simplify
+   (minus (convert @0)
+    (convert (pointer_plus @@0 @1)))
+   (if (INTEGRAL_TYPE_P (type)
+	&& TYPE_OVERFLOW_UNDEFINED (type)
+	&& element_precision (type) <= element_precision (TREE_TYPE (@1)))
+    (with { tree utype = unsigned_type_for (type); }
+     (convert (negate (convert:utype @1))))
+    (if (element_precision (type) <= element_precision (TREE_TYPE (@1))
 	 /* For pointer types, if the conversion of A to the
 	    final type requires a sign- or zero-extension,
 	    then we have to punt - it is not defined which
@@ -1843,10 +1861,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      (negate (convert (convert:stype @1)))))
 
   /* (T)(P + A) - (T)(P + B) -> (T)A - (T)B */
-  (for add (plus pointer_plus)
-   (simplify
-    (minus (convert (add @@0 @1))
-     (convert (add @0 @2)))
+  (simplify
+   (minus (convert (plus:c @@0 @1))
+    (convert (plus:c @0 @2)))
+   (if (INTEGRAL_TYPE_P (type)
+	&& TYPE_OVERFLOW_UNDEFINED (type)
+	&& element_precision (type) <= element_precision (TREE_TYPE (@1)))
+    (with { tree utype = unsigned_type_for (type); }
+     (convert (minus (convert:utype @1) (convert:utype @2))))
     (if (element_precision (type) <= element_precision (TREE_TYPE (@1))
 	 /* For integer types, if A has a smaller type
 	    than T the result depends on the possible
@@ -1856,7 +1878,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	    undefined behavior, we can assume that there
 	    is no overflow.  */
 	 || (INTEGRAL_TYPE_P (TREE_TYPE (@0))
-	     && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)))
+	     && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))))
+     (minus (convert @1) (convert @2)))))
+  (simplify
+   (minus (convert (pointer_plus @@0 @1))
+    (convert (pointer_plus @0 @2)))
+   (if (INTEGRAL_TYPE_P (type)
+	&& TYPE_OVERFLOW_UNDEFINED (type)
+	&& element_precision (type) <= element_precision (TREE_TYPE (@1)))
+    (with { tree utype = unsigned_type_for (type); }
+     (convert (minus (convert:utype @1) (convert:utype @2))))
+    (if (element_precision (type) <= element_precision (TREE_TYPE (@1))
 	 /* For pointer types, if the conversion of A to the
 	    final type requires a sign- or zero-extension,
 	    then we have to punt - it is not defined which
@@ -1866,13 +1898,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	     && tree_int_cst_sign_bit (@1) == 0
 	     && TREE_CODE (@2) == INTEGER_CST
 	     && tree_int_cst_sign_bit (@2) == 0))
-     (minus (convert @1) (convert @2)))))))
+     (minus (convert @1) (convert @2)))))
    (simplify
     (pointer_diff (pointer_plus @@0 @1) (pointer_plus @0 @2))
     /* The second argument of pointer_plus must be interpreted as signed, and
        thus sign-extended if necessary.  */
     (with { tree stype = signed_type_for (TREE_TYPE (@1)); }
-     (minus (convert (convert:stype @1)) (convert (convert:stype @2)))))
+     (minus (convert (convert:stype @1)) (convert (convert:stype @2)))))))
 
 
 /* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax().  */
--- gcc/testsuite/gcc.c-torture/execute/pr81281.c.jj	2017-12-05 11:36:02.094790028 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr81281.c	2017-12-05 11:35:44.328014023 +0100
@@ -0,0 +1,33 @@
+/* PR sanitizer/81281 */
+
+void
+foo (unsigned p, unsigned a, unsigned b)
+{
+  unsigned q = p + 7;
+  if (a - (1U + __INT_MAX__) >= 2)
+    __builtin_unreachable ();
+  int d = p + b;
+  int c = p + a;
+  if (c - d != __INT_MAX__)
+    __builtin_abort ();
+}
+
+void
+bar (unsigned p, unsigned a)
+{
+  unsigned q = p + 7;
+  if (a - (1U + __INT_MAX__) >= 2)
+    __builtin_unreachable ();
+  int c = p;
+  int d = p + a;
+  if (c - d != -__INT_MAX__ - 1)
+    __builtin_abort ();
+}
+
+int
+main ()
+{
+  foo (-1U, 1U + __INT_MAX__, 1U);
+  bar (-1U, 1U + __INT_MAX__);
+  return 0;
+}
--- gcc/testsuite/gcc.dg/pr81281-1.c.jj	2017-12-05 11:59:03.070390424 +0100
+++ gcc/testsuite/gcc.dg/pr81281-1.c	2017-12-05 11:59:25.027113847 +0100
@@ -0,0 +1,150 @@
+/* PR sanitizer/81281 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "p_\[0-9]*\\(D\\)" "optimized" } } */
+
+long long
+f1 (int p, int a, int b)
+{
+  int c = p + 1;
+  int d = a + 2;
+  int e = b + 3;
+  long long f = p + a;
+  long long g = p + b;
+  return f - g;
+}
+
+long long
+f2 (int p, int a, int b)
+{
+  int c = a + 1;
+  int d = p + 2;
+  int e = b + 3;
+  long long f = p + a;
+  long long g = p + b;
+  return f - g;
+}
+
+long long
+f3 (int p, int a, int b)
+{
+  int c = b + 1;
+  int d = p + 2;
+  int e = a + 3;
+  long long f = p + a;
+  long long g = p + b;
+  return f - g;
+}
+
+signed char
+f4 (int p, int a, int b)
+{
+  int c = p + 1;
+  int d = a + 2;
+  int e = b + 3;
+  signed char f = p + a;
+  signed char g = p + b;
+  return f - g;
+}
+
+signed char
+f5 (int p, int a, int b)
+{
+  int c = a + 1;
+  int d = p + 2;
+  int e = b + 3;
+  signed char f = p + a;
+  signed char g = p + b;
+  return f - g;
+}
+
+signed char
+f6 (int p, int a, int b)
+{
+  int c = b + 1;
+  int d = p + 2;
+  int e = a + 3;
+  signed char f = p + a;
+  signed char g = p + b;
+  return f - g;
+}
+
+long long
+f7 (int p, int a)
+{
+  int c = p + 1;
+  int d = a + 2;
+  long long f = p + a;
+  long long g = p;
+  return f - g;
+}
+
+long long
+f8 (int p, int a)
+{
+  int c = a + 1;
+  int d = p + 2;
+  long long f = p + a;
+  long long g = p;
+  return f - g;
+}
+
+signed char
+f9 (int p, int a)
+{
+  int c = p + 1;
+  int d = a + 2;
+  signed char f = p + a;
+  signed char g = p;
+  return f - g;
+}
+
+signed char
+f10 (int p, int a)
+{
+  int c = a + 1;
+  int d = p + 2;
+  signed char f = p + a;
+  signed char g = p;
+  return f - g;
+}
+
+long long
+f11 (int p, int a)
+{
+  int c = p + 1;
+  int d = a + 2;
+  long long f = p;
+  long long g = p + a;
+  return f - g;
+}
+
+long long
+f12 (int p, int a)
+{
+  int c = a + 1;
+  int d = p + 2;
+  long long f = p;
+  long long g = p + a;
+  return f - g;
+}
+
+signed char
+f13 (int p, int a)
+{
+  int c = p + 1;
+  int d = a + 2;
+  signed char f = p;
+  signed char g = p + a;
+  return f - g;
+}
+
+signed char
+f14 (int p, int a)
+{
+  int c = a + 1;
+  int d = p + 2;
+  signed char f = p;
+  signed char g = p + a;
+  return f - g;
+}
--- gcc/testsuite/gcc.dg/pr81281-2.c.jj	2017-12-05 12:00:28.586313227 +0100
+++ gcc/testsuite/gcc.dg/pr81281-2.c	2017-12-05 12:15:17.000000000 +0100
@@ -0,0 +1,80 @@
+/* PR sanitizer/81281 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "p_\[0-9]*\\(D\\)" "optimized" } } */
+
+typedef __SIZE_TYPE__ size_t;
+typedef __INTPTR_TYPE__ T;
+
+T
+f1 (char *p, size_t a, size_t b)
+{
+  char *c = p + 1;
+  size_t d = a + 2;
+  size_t e = b + 3;
+  T f = (T) (p + a);
+  T g = (T) (p + b);
+  return f - g;
+}
+
+T
+f2 (char *p, size_t a, size_t b)
+{
+  size_t c = a + 1;
+  char *d = p + 2;
+  size_t e = b + 3;
+  T f = (T) (p + a);
+  T g = (T) (p + b);
+  return f - g;
+}
+
+T
+f3 (char *p, size_t a, size_t b)
+{
+  size_t c = b + 1;
+  char *d = p + 2;
+  size_t e = a + 3;
+  T f = (T) (p + a);
+  T g = (T) (p + b);
+  return f - g;
+}
+
+T
+f4 (char *p, size_t a)
+{
+  char *c = p + 1;
+  size_t d = a + 2;
+  T f = (T) (p + a);
+  T g = (T) p;
+  return f - g;
+}
+
+T
+f5 (char *p, size_t a)
+{
+  size_t c = a + 1;
+  char *d = p + 2;
+  T f = (T) (p + a);
+  T g = (T) p;
+  return f - g;
+}
+
+T
+f6 (char *p, size_t a)
+{
+  char *c = p + 1;
+  size_t d = a + 2;
+  T f = (T) p;
+  T g = (T) (p + a);
+  return f - g;
+}
+
+T
+f7 (char *p, size_t a)
+{
+  size_t c = a + 1;
+  char *d = p + 2;
+  T f = (T) p;
+  T g = (T) (p + a);
+  return f - g;
+}
--- gcc/testsuite/g++.dg/ubsan/pr81281.C.jj	2017-12-05 11:34:55.296632189 +0100
+++ gcc/testsuite/g++.dg/ubsan/pr81281.C	2017-12-05 11:34:55.296632189 +0100
@@ -0,0 +1,26 @@
+// PR sanitizer/81281
+// { dg-do run }
+// { dg-options "-fsanitize=undefined -fno-sanitize-recover=undefined" }
+// { dg-additional-sources "pr81281-aux.cc" }
+
+extern const int ci;
+extern int i;
+extern long long ll;
+
+int
+foo ()
+{
+  int a = (int) (-2024172551 - i - (ci - ll))
+	  - ((int) (-2024172551 - i - (ci - ll))
+	     - (int) (-2024172551 - (long long)ci));
+  return a;
+}
+
+int
+main ()
+{
+  if (__SIZEOF_INT__ * __CHAR_BIT__ == 32
+      && __SIZEOF_LONG_LONG__ * __CHAR_BIT__ == 64)
+    foo ();
+  return 0;
+}
--- gcc/testsuite/g++.dg/ubsan/pr81281-aux.cc.jj	2017-12-05 11:34:55.296632189 +0100
+++ gcc/testsuite/g++.dg/ubsan/pr81281-aux.cc	2017-12-05 11:34:55.296632189 +0100
@@ -0,0 +1,3 @@
+extern const int ci = 1716607962;
+int i = -943738830;
+long long ll = -43165919987465092LL;

	Jakub



More information about the Gcc-patches mailing list