PR target/30315 i386 missed optimization detecting overflows

Rask Ingemann Lambertsen rask@sygehus.dk
Fri Aug 3 19:35:00 GMT 2007


   PR target/30315 is an enhancement request for optimizing overflow checks.

unsigned long
fool1 (unsigned long a, unsigned long b)
{
  unsigned long sum = a + b;
  if (sum < a)
    abort ();
  return sum;
}

   Currently, we generate (-O -m32):

fool1:
	pushl	%ebp		# 31	*pushsi2	[length = 1]
	movl	%esp, %ebp	# 32	*movsi_1/1	[length = 2]
	subl	$8, %esp	# 33	pro_epilogue_adjust_stack_1/1	[length = 3]
	movl	8(%ebp), %eax	# 2	*movsi_1/1	[length = 3]
	movl	12(%ebp), %edx	# 30	*movsi_1/1	[length = 3]
	addl	%eax, %edx	# 7	*addsi_1/1	[length = 2]
	cmpl	%edx, %eax	# 8	*cmpsi_1_insn/1	[length = 2]
	jbe	.L11		# 9	*jcc_1	[length = 2]
	call	abort		# 11	*call_0	[length = 5]
.L11:
	movl	%edx, %eax	# 19	*movsi_1/1	[length = 2]
	leave			# 36	leave	[length = 1]
	ret			# 37	return_internal	[length = 1]

   With the patch (-O -m32):

fool1:
	pushl	%ebp		# 30	*pushsi2	[length = 1]
	movl	%esp, %ebp	# 31	*movsi_1/1	[length = 2]
	subl	$8, %esp	# 32	pro_epilogue_adjust_stack_1/1	[length = 3]
	movl	8(%ebp), %eax	# 2	*movsi_1/1	[length = 3]
	addl	12(%ebp), %eax	# 8	*addsi3_cc_overflow2/2	[length = 3]
	jae	.L23		# 9	*jcc_1	[length = 2]
	call	abort		# 11	*call_0	[length = 5]
.L23:
	leave			# 35	leave	[length = 1]
	ret			# 36	return_internal	[length = 1]

   On x86_64, without the patch (-O -m64):

fool1:
.LFB2:
	subq	$8, %rsp	# 31	pro_epilogue_adjust_stack_rex64/1	[length = 4]
.LCFI3:
	leaq	(%rsi,%rdi), %rax	# 30	*lea_2_rex64	[length = 4]
	cmpq	%rax, %rdi	# 8	cmpdi_1_insn_rex64/1	[length = 3]
	jbe	.L11		# 9	*jcc_1	[length = 2]
	call	abort		# 11	*call_0	[length = 5]
.L11:
	addq	$8, %rsp	# 34	pro_epilogue_adjust_stack_rex64/1	[length = 4]
	ret			# 35	return_internal	[length = 1]

   With the patch (-O -m64):

fool1:
.LFB2:
	subq	$8, %rsp	# 31	pro_epilogue_adjust_stack_rex64/1	[length = 4]
.LCFI7:
	movq	%rsi, %rax	# 30	*movdi_1_rex64/2	[length = 6]
	addq	%rdi, %rax	# 8	*adddi3_cc_overflow2_rex64/1	[length = 3]
	jae	.L23		# 9	*jcc_1	[length = 2]
	call	abort		# 11	*call_0	[length = 5]
.L23:
	addq	$8, %rsp	# 34	pro_epilogue_adjust_stack_rex64/1	[length = 4]
	ret			# 35	return_internal	[length = 1]

   Likewise we also optimize overflow checks when subtracting numbers:

unsigned long
barl1 (unsigned long a, unsigned long b)
{
  unsigned long difference = a - b;
  if (difference > a)
    abort ();
  return difference;
}

barl1:
	pushl	%ebp		# 30	*pushsi2	[length = 1]
	movl	%esp, %ebp	# 31	*movsi_1/1	[length = 2]
	subl	$8, %esp	# 32	pro_epilogue_adjust_stack_1/1	[length = 3]
	movl	8(%ebp), %eax	# 2	*movsi_1/1	[length = 3]
	subl	12(%ebp), %eax	# 8	*subsi3_cc_overflow1/2	[length = 3]
	jae	.L11		# 9	*jcc_1	[length = 2]
	call	abort		# 11	*call_0	[length = 5]
.L11:
	leave			# 35	leave	[length = 1]
	ret			# 36	return_internal	[length = 1]

barl1:
.LFB6:
	subq	$8, %rsp	# 31	pro_epilogue_adjust_stack_rex64/1	[length = 4]
.LCFI3:
	movq	%rdi, %rax	# 30	*movdi_1_rex64/2	[length = 6]
	subq	%rsi, %rax	# 8	*subdi3_cc_overflow1_rex64/1	[length = 3]
	jae	.L11		# 9	*jcc_1	[length = 2]
	call	abort		# 11	*call_0	[length = 5]
.L11:
	addq	$8, %rsp	# 34	pro_epilogue_adjust_stack_rex64/1	[length = 4]
	ret			# 35	return_internal	[length = 1]

   This patch does not completely fix the original testcase which has:

     if (sum < a) abort(); /* check for overflow (wrapping) */
     if (sum < b) abort(); /* redundant */

Some help from the middle end or the tree optimizers is needed to get rid of
the second test, but the patch does optimize away the first such test.
Ok for trunk if it passes bootstrap and testing?

:ADDPATCH target i386:

2007-08-03  Rask Ingemann Lambertsen  <rask@sygehus.dk>

	PR target/30315
	* config/i386/i386.md (*adddi3_cc_overflow1_rex64): New.
	  (*adddi3_cc_overflow2_rex64): New.
	  (*addsi3_cc_overflow1): New.
	  (*addsi3_cc_overflow2): New.
	  (*subdi3_cc_overflow1_rex64): New.
	  (*subdi3_cc_overflow2_rex64): New.
	  (*subsi3_cc_overflow1): New.
	  (*subsi3_cc_overflow2): New.
	* config/i386/predicates.md (fcmov_comparison_operator): Accept
	  CCCmode for LTU, GTU, LEU and GEU.
	  (ix86_comparison_operator): Likewise.
	  (ix86_carry_flag_operator): Carry set if LTU or GTU in CCCmode.
	* gcc/config/i386/i386.c (put_condition_code): Support CCCmode.
	  (ix86_cc_mode): Use CCCmode when testing for overflow of PLUS
	  or MINUS expressions.

testsuite/

2007-08-03  Rask Ingemann Lambertsen  <rask@sygehus.dk>

	PR target/30315
	* gcc/testsuite/gcc.target/i386/pr30315.c: New.

Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md	(revision 127179)
+++ gcc/config/i386/i386.md	(working copy)
@@ -4855,6 +4855,32 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "DI")])
 
+(define_insn "*adddi3_cc_overflow1_rex64"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	    (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+		     (match_operand:DI 2 "x86_64_general_operand" "re,rm"))
+	    (match_dup 1)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+  "add{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
+(define_insn "*adddi3_cc_overflow2_rex64"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	    (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+		     (match_operand:DI 2 "x86_64_general_operand" "re,rm"))
+	    (match_dup 2)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+  "add{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
 (define_insn "addqi3_carry"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
 	  (plus:QI (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "")
@@ -4916,6 +4942,30 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
+(define_insn "*addsi3_cc_overflow1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+			      (match_operand:SI 2 "general_operand" "ri,rm"))
+		     (match_dup 1)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, SImode, operands)"
+  "add{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*addsi3_cc_overflow2"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+			      (match_operand:SI 2 "general_operand" "ri,rm"))
+		     (match_dup 2)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, SImode, operands)"
+  "add{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
 (define_insn "addqi3_cc"
   [(set (reg:CC FLAGS_REG)
 	(unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0")
@@ -6608,6 +6658,32 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "DI")])
 
+(define_insn "*subdi3_cc_overflow1_rex64"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+		    (match_operand:DI 2 "x86_64_general_operand" "re,rm"))
+	  (match_dup 1)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+	(minus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
+  "sub{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
+(define_insn "*subdi3_cc_overflow2_rex64"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+		    (match_operand:DI 2 "x86_64_general_operand" "re,rm"))
+	  (match_dup 2)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+	(minus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
+  "sub{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "DI")])
+
 (define_insn "subqi3_carry"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
 	  (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
@@ -6700,6 +6776,32 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
+(define_insn "*subsi3_cc_overflow1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+		    (match_operand:SI 2 "general_operand" "ri,rm"))
+	  (match_dup 1)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*subsi3_cc_overflow2"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+		    (match_operand:SI 2 "general_operand" "ri,rm"))
+	  (match_dup 2)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
 (define_insn "*subsi_2_zext"
   [(set (reg FLAGS_REG)
 	(compare
Index: gcc/config/i386/predicates.md
===================================================================
--- gcc/config/i386/predicates.md	(revision 127179)
+++ gcc/config/i386/predicates.md	(working copy)
@@ -879,7 +879,8 @@
   switch (code)
     {
     case LTU: case GTU: case LEU: case GEU:
-      if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
+      if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode
+	  || inmode == CCCmode)
 	return 1;
       return 0;
     case ORDERED: case UNORDERED:
@@ -924,7 +925,11 @@
 	  || inmode == CCGOCmode || inmode == CCNOmode)
 	return 1;
       return 0;
-    case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
+    case LTU: case GTU: case LEU: case GEU:
+      if (inmode == CCmode || inmode == CCCmode)
+	return 1;
+      return 0;
+    case ORDERED: case UNORDERED:
       if (inmode == CCmode)
 	return 1;
       return 0;
@@ -957,6 +962,8 @@
 	return 0;
       code = ix86_fp_compare_code_to_integer (code);
     }
+  else if (inmode == CCCmode)
+   return code == LTU || code == GTU;
   else if (inmode != CCmode)
     return 0;
 
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 127179)
+++ gcc/config/i386/i386.c	(working copy)
@@ -8254,8 +8254,12 @@ put_condition_code (enum rtx_code code, 
     case GTU:
       /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
 	 Those same assemblers have the same but opposite lossage on cmov.  */
-      gcc_assert (mode == CCmode);
-      suffix = fp ? "nbe" : "a";
+      if (mode == CCmode)
+	suffix = fp ? "nbe" : "a";
+      else if (mode == CCCmode)
+	suffix = "b";
+      else
+	gcc_unreachable ();
       break;
     case LT:
       switch (mode)
@@ -8275,7 +8279,7 @@ put_condition_code (enum rtx_code code, 
 	}
       break;
     case LTU:
-      gcc_assert (mode == CCmode);
+      gcc_assert (mode == CCmode || mode == CCCmode);
       suffix = "b";
       break;
     case GE:
@@ -8297,7 +8301,7 @@ put_condition_code (enum rtx_code code, 
       break;
     case GEU:
       /* ??? As above.  */
-      gcc_assert (mode == CCmode);
+      gcc_assert (mode == CCmode || mode == CCCmode);
       suffix = fp ? "nb" : "ae";
       break;
     case LE:
@@ -8305,8 +8309,12 @@ put_condition_code (enum rtx_code code, 
       suffix = "le";
       break;
     case LEU:
-      gcc_assert (mode == CCmode);
-      suffix = "be";
+      if (mode == CCmode)
+	suffix = "be";
+      else if (mode == CCCmode)
+	suffix = fp ? "nb" : "ae";
+      else
+	gcc_unreachable ();
       break;
     case UNORDERED:
       suffix = fp ? "u" : "p";
@@ -11033,10 +11041,24 @@ ix86_cc_mode (enum rtx_code code, rtx op
       return CCZmode;
       /* Codes needing carry flag.  */
     case GEU:			/* CF=0 */
+      /* Detect overflow checks.  They need just the carry flag.  */
+      if (GET_CODE (op0) == PLUS
+	  && (rtx_equal_p (op1, XEXP (op0, 0))
+	      || rtx_equal_p (op1, XEXP (op0, 1))))
+	return CCCmode;
+      else
+	return CCmode;
     case GTU:			/* CF=0 & ZF=0 */
     case LTU:			/* CF=1 */
-    case LEU:			/* CF=1 | ZF=1 */
       return CCmode;
+    case LEU:			/* CF=1 | ZF=1 */
+      /* Detect overflow checks.  They need just the carry flag.  */
+      if (GET_CODE (op0) == MINUS
+	  && (rtx_equal_p (op1, XEXP (op0, 0))
+	      || rtx_equal_p (op1, XEXP (op0, 1))))
+	return CCCmode;
+      else
+	return CCmode;
       /* Codes possibly doable only with sign flag when
          comparing against zero.  */
     case GE:			/* SF=OF   or   SF=0 */
Index: gcc/testsuite/gcc.target/i386/pr30315.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr30315.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr30315.c	(revision 0)
@@ -0,0 +1,77 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { scan-assembler-not "cmp" } } */
+
+extern void abort (void);
+
+unsigned long
+fool1 (unsigned long a, unsigned long b)
+{
+  unsigned long sum = a + b;
+  if (sum < a)
+    abort ();
+  return sum;
+}
+
+unsigned long
+fool2 (unsigned long a, unsigned long b)
+{
+  unsigned long sum = a + b;
+  if (sum < b)
+    abort ();
+  return sum;
+}
+
+unsigned int
+fooi1 (unsigned int a, unsigned int b)
+{
+  unsigned int sum = a + b;
+  if (sum < a)
+    abort ();
+  return sum;
+}
+
+unsigned int
+fooi2 (unsigned int a, unsigned int b)
+{
+  unsigned int sum = a + b;
+  if (sum < b)
+    abort ();
+  return sum;
+}
+
+unsigned long
+barl1 (unsigned long a, unsigned long b)
+{
+  unsigned long difference = a - b;
+  if (difference > a)
+    abort ();
+  return difference;
+}
+
+unsigned long
+barl2 (unsigned long a, unsigned long b)
+{
+  unsigned long difference = a - b;
+  if (difference > b)
+    abort ();
+  return difference;
+}
+
+unsigned int
+bari1 (unsigned int a, unsigned int b)
+{
+  unsigned int difference = a - b;
+  if (difference > a)
+    abort ();
+  return difference;
+}
+
+unsigned int
+bari2 (unsigned int a, unsigned int b)
+{
+  unsigned int difference = a - b;
+  if (difference > b)
+    abort ();
+  return difference;
+}



More information about the Gcc-patches mailing list