This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
PR target/30315 i386 missed optimization detecting overflows
- From: Rask Ingemann Lambertsen <rask at sygehus dot dk>
- To: gcc-patches at gcc dot gnu dot org
- Date: Fri, 3 Aug 2007 21:34:57 +0200
- Subject: PR target/30315 i386 missed optimization detecting overflows
PR target/30315 is an enhancement request for optimizing overflow checks.
unsigned long
fool1 (unsigned long a, unsigned long b)
{
unsigned long sum = a + b;
if (sum < a)
abort ();
return sum;
}
Currently, we generate (-O -m32):
fool1:
pushl %ebp # 31 *pushsi2 [length = 1]
movl %esp, %ebp # 32 *movsi_1/1 [length = 2]
subl $8, %esp # 33 pro_epilogue_adjust_stack_1/1 [length = 3]
movl 8(%ebp), %eax # 2 *movsi_1/1 [length = 3]
movl 12(%ebp), %edx # 30 *movsi_1/1 [length = 3]
addl %eax, %edx # 7 *addsi_1/1 [length = 2]
cmpl %edx, %eax # 8 *cmpsi_1_insn/1 [length = 2]
jbe .L11 # 9 *jcc_1 [length = 2]
call abort # 11 *call_0 [length = 5]
.L11:
movl %edx, %eax # 19 *movsi_1/1 [length = 2]
leave # 36 leave [length = 1]
ret # 37 return_internal [length = 1]
With the patch (-O -m32):
fool1:
pushl %ebp # 30 *pushsi2 [length = 1]
movl %esp, %ebp # 31 *movsi_1/1 [length = 2]
subl $8, %esp # 32 pro_epilogue_adjust_stack_1/1 [length = 3]
movl 8(%ebp), %eax # 2 *movsi_1/1 [length = 3]
addl 12(%ebp), %eax # 8 *addsi3_cc_overflow2/2 [length = 3]
jae .L23 # 9 *jcc_1 [length = 2]
call abort # 11 *call_0 [length = 5]
.L23:
leave # 35 leave [length = 1]
ret # 36 return_internal [length = 1]
On x86_64, without the patch (-O -m64):
fool1:
.LFB2:
subq $8, %rsp # 31 pro_epilogue_adjust_stack_rex64/1 [length = 4]
.LCFI3:
leaq (%rsi,%rdi), %rax # 30 *lea_2_rex64 [length = 4]
cmpq %rax, %rdi # 8 cmpdi_1_insn_rex64/1 [length = 3]
jbe .L11 # 9 *jcc_1 [length = 2]
call abort # 11 *call_0 [length = 5]
.L11:
addq $8, %rsp # 34 pro_epilogue_adjust_stack_rex64/1 [length = 4]
ret # 35 return_internal [length = 1]
With the patch (-O -m64):
fool1:
.LFB2:
subq $8, %rsp # 31 pro_epilogue_adjust_stack_rex64/1 [length = 4]
.LCFI7:
movq %rsi, %rax # 30 *movdi_1_rex64/2 [length = 6]
addq %rdi, %rax # 8 *adddi3_cc_overflow2_rex64/1 [length = 3]
jae .L23 # 9 *jcc_1 [length = 2]
call abort # 11 *call_0 [length = 5]
.L23:
addq $8, %rsp # 34 pro_epilogue_adjust_stack_rex64/1 [length = 4]
ret # 35 return_internal [length = 1]
Likewise we also optimize overflow checks when subtracting numbers:
unsigned long
barl1 (unsigned long a, unsigned long b)
{
unsigned long difference = a - b;
if (difference > a)
abort ();
return difference;
}
barl1:
pushl %ebp # 30 *pushsi2 [length = 1]
movl %esp, %ebp # 31 *movsi_1/1 [length = 2]
subl $8, %esp # 32 pro_epilogue_adjust_stack_1/1 [length = 3]
movl 8(%ebp), %eax # 2 *movsi_1/1 [length = 3]
subl 12(%ebp), %eax # 8 *subsi3_cc_overflow1/2 [length = 3]
jae .L11 # 9 *jcc_1 [length = 2]
call abort # 11 *call_0 [length = 5]
.L11:
leave # 35 leave [length = 1]
ret # 36 return_internal [length = 1]
barl1:
.LFB6:
subq $8, %rsp # 31 pro_epilogue_adjust_stack_rex64/1 [length = 4]
.LCFI3:
movq %rdi, %rax # 30 *movdi_1_rex64/2 [length = 6]
subq %rsi, %rax # 8 *subdi3_cc_overflow1_rex64/1 [length = 3]
jae .L11 # 9 *jcc_1 [length = 2]
call abort # 11 *call_0 [length = 5]
.L11:
addq $8, %rsp # 34 pro_epilogue_adjust_stack_rex64/1 [length = 4]
ret # 35 return_internal [length = 1]
This patch does not completely fix the original testcase which has:
if (sum < a) abort(); /* check for overflow (wrapping) */
if (sum < b) abort(); /* redundant */
Some help from the middle end or the tree optimizers is needed to get rid of
the second test, but the patch does optimize away the first such test.
Ok for trunk if it passes bootstrap and testing?
:ADDPATCH target i386:
2007-08-03 Rask Ingemann Lambertsen <rask@sygehus.dk>
PR target/30315
* config/i386/i386.md (*adddi3_cc_overflow1_rex64): New.
(*adddi3_cc_overflow2_rex64): New.
(*addsi3_cc_overflow1): New.
(*addsi3_cc_overflow2): New.
(*subdi3_cc_overflow1_rex64): New.
(*subdi3_cc_overflow2_rex64): New.
(*subsi3_cc_overflow1): New.
(*subsi3_cc_overflow2): New.
* config/i386/predicates.md (fcmov_comparison_operator): Accept
CCCmode for LTU, GTU, LEU and GEU.
(ix86_comparison_operator): Likewise.
(ix86_carry_flag_operator): Carry set if LTU or GTU in CCCmode.
* gcc/config/i386/i386.c (put_condition_code): Support CCCmode.
(ix86_cc_mode): Use CCCmode when testing for overflow of PLUS
or MINUS expressions.
testsuite/
2007-08-03 Rask Ingemann Lambertsen <rask@sygehus.dk>
PR target/30315
* gcc/testsuite/gcc.target/i386/pr30315.c: New.
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md (revision 127179)
+++ gcc/config/i386/i386.md (working copy)
@@ -4855,6 +4855,32 @@
[(set_attr "type" "alu")
(set_attr "mode" "DI")])
+(define_insn "*adddi3_cc_overflow1_rex64"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm"))
+ (match_dup 1)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (plus:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+ "add{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*adddi3_cc_overflow2_rex64"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm"))
+ (match_dup 2)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (plus:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+ "add{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
(define_insn "addqi3_carry"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
(plus:QI (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "")
@@ -4916,6 +4942,30 @@
[(set_attr "type" "alu")
(set_attr "mode" "SI")])
+(define_insn "*addsi3_cc_overflow1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm"))
+ (match_dup 1)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (plus:SI (match_dup 1) (match_dup 2)))]
+ "ix86_binary_operator_ok (PLUS, SImode, operands)"
+ "add{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*addsi3_cc_overflow2"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm"))
+ (match_dup 2)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (plus:SI (match_dup 1) (match_dup 2)))]
+ "ix86_binary_operator_ok (PLUS, SImode, operands)"
+ "add{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
(define_insn "addqi3_cc"
[(set (reg:CC FLAGS_REG)
(unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0")
@@ -6608,6 +6658,32 @@
[(set_attr "type" "alu")
(set_attr "mode" "DI")])
+(define_insn "*subdi3_cc_overflow1_rex64"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm"))
+ (match_dup 1)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (minus:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
+ "sub{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*subdi3_cc_overflow2_rex64"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm"))
+ (match_dup 2)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (minus:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
+ "sub{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
(define_insn "subqi3_carry"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
(minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
@@ -6700,6 +6776,32 @@
[(set_attr "type" "alu")
(set_attr "mode" "SI")])
+(define_insn "*subsi3_cc_overflow1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm"))
+ (match_dup 1)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (minus:SI (match_dup 1) (match_dup 2)))]
+ "ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*subsi3_cc_overflow2"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm"))
+ (match_dup 2)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (minus:SI (match_dup 1) (match_dup 2)))]
+ "ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
(define_insn "*subsi_2_zext"
[(set (reg FLAGS_REG)
(compare
Index: gcc/config/i386/predicates.md
===================================================================
--- gcc/config/i386/predicates.md (revision 127179)
+++ gcc/config/i386/predicates.md (working copy)
@@ -879,7 +879,8 @@
switch (code)
{
case LTU: case GTU: case LEU: case GEU:
- if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
+ if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode
+ || inmode == CCCmode)
return 1;
return 0;
case ORDERED: case UNORDERED:
@@ -924,7 +925,11 @@
|| inmode == CCGOCmode || inmode == CCNOmode)
return 1;
return 0;
- case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
+ case LTU: case GTU: case LEU: case GEU:
+ if (inmode == CCmode || inmode == CCCmode)
+ return 1;
+ return 0;
+ case ORDERED: case UNORDERED:
if (inmode == CCmode)
return 1;
return 0;
@@ -957,6 +962,8 @@
return 0;
code = ix86_fp_compare_code_to_integer (code);
}
+ else if (inmode == CCCmode)
+ return code == LTU || code == GTU;
else if (inmode != CCmode)
return 0;
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c (revision 127179)
+++ gcc/config/i386/i386.c (working copy)
@@ -8254,8 +8254,12 @@ put_condition_code (enum rtx_code code,
case GTU:
/* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
Those same assemblers have the same but opposite lossage on cmov. */
- gcc_assert (mode == CCmode);
- suffix = fp ? "nbe" : "a";
+ if (mode == CCmode)
+ suffix = fp ? "nbe" : "a";
+ else if (mode == CCCmode)
+ suffix = "b";
+ else
+ gcc_unreachable ();
break;
case LT:
switch (mode)
@@ -8275,7 +8279,7 @@ put_condition_code (enum rtx_code code,
}
break;
case LTU:
- gcc_assert (mode == CCmode);
+ gcc_assert (mode == CCmode || mode == CCCmode);
suffix = "b";
break;
case GE:
@@ -8297,7 +8301,7 @@ put_condition_code (enum rtx_code code,
break;
case GEU:
/* ??? As above. */
- gcc_assert (mode == CCmode);
+ gcc_assert (mode == CCmode || mode == CCCmode);
suffix = fp ? "nb" : "ae";
break;
case LE:
@@ -8305,8 +8309,12 @@ put_condition_code (enum rtx_code code,
suffix = "le";
break;
case LEU:
- gcc_assert (mode == CCmode);
- suffix = "be";
+ if (mode == CCmode)
+ suffix = "be";
+ else if (mode == CCCmode)
+ suffix = fp ? "nb" : "ae";
+ else
+ gcc_unreachable ();
break;
case UNORDERED:
suffix = fp ? "u" : "p";
@@ -11033,10 +11041,24 @@ ix86_cc_mode (enum rtx_code code, rtx op
return CCZmode;
/* Codes needing carry flag. */
case GEU: /* CF=0 */
+ /* Detect overflow checks. They need just the carry flag. */
+ if (GET_CODE (op0) == PLUS
+ && (rtx_equal_p (op1, XEXP (op0, 0))
+ || rtx_equal_p (op1, XEXP (op0, 1))))
+ return CCCmode;
+ else
+ return CCmode;
case GTU: /* CF=0 & ZF=0 */
case LTU: /* CF=1 */
- case LEU: /* CF=1 | ZF=1 */
return CCmode;
+ case LEU: /* CF=1 | ZF=1 */
+ /* Detect overflow checks. They need just the carry flag. */
+ if (GET_CODE (op0) == MINUS
+ && (rtx_equal_p (op1, XEXP (op0, 0))
+ || rtx_equal_p (op1, XEXP (op0, 1))))
+ return CCCmode;
+ else
+ return CCmode;
/* Codes possibly doable only with sign flag when
comparing against zero. */
case GE: /* SF=OF or SF=0 */
Index: gcc/testsuite/gcc.target/i386/pr30315.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr30315.c (revision 0)
+++ gcc/testsuite/gcc.target/i386/pr30315.c (revision 0)
@@ -0,0 +1,77 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { scan-assembler-not "cmp" } } */
+
+extern void abort (void);
+
+unsigned long
+fool1 (unsigned long a, unsigned long b)
+{
+ unsigned long sum = a + b;
+ if (sum < a)
+ abort ();
+ return sum;
+}
+
+unsigned long
+fool2 (unsigned long a, unsigned long b)
+{
+ unsigned long sum = a + b;
+ if (sum < b)
+ abort ();
+ return sum;
+}
+
+unsigned int
+fooi1 (unsigned int a, unsigned int b)
+{
+ unsigned int sum = a + b;
+ if (sum < a)
+ abort ();
+ return sum;
+}
+
+unsigned int
+fooi2 (unsigned int a, unsigned int b)
+{
+ unsigned int sum = a + b;
+ if (sum < b)
+ abort ();
+ return sum;
+}
+
+unsigned long
+barl1 (unsigned long a, unsigned long b)
+{
+ unsigned long difference = a - b;
+ if (difference > a)
+ abort ();
+ return difference;
+}
+
+unsigned long
+barl2 (unsigned long a, unsigned long b)
+{
+ unsigned long difference = a - b;
+ if (difference > b)
+ abort ();
+ return difference;
+}
+
+unsigned int
+bari1 (unsigned int a, unsigned int b)
+{
+ unsigned int difference = a - b;
+ if (difference > a)
+ abort ();
+ return difference;
+}
+
+unsigned int
+bari2 (unsigned int a, unsigned int b)
+{
+ unsigned int difference = a - b;
+ if (difference > b)
+ abort ();
+ return difference;
+}