[PATCH, i386]: Implement zero-extended cmove patterns
Uros Bizjak
ubizjak@gmail.com
Thu Jun 18 20:10:00 GMT 2015
Attached patch implements zero-extended cmove patterns. Also, the
patch merges a couple of peephole patterns into one.
2015-06-18 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (*movsicc_noc_zext): New insn.
(zero-extended cmove with mem peephole2): New pattern.
(cmove with mem peephole2): Merge patterns.
testsuite/ChangeLog:
2015-06-18 Uros Bizjak <ubizjak@gmail.com>
* gcc.target/i386/cmov9.c: New test.
Tested on x86_64-linux-gnu {,-m32} and committed to mainline SVN.
Uros.
-------------- next part --------------
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 224625)
+++ config/i386/i386.md (working copy)
@@ -16746,6 +16746,22 @@
[(set_attr "type" "icmov")
(set_attr "mode" "<MODE>")])
+(define_insn "*movsicc_noc_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (zero_extend:DI
+ (match_operand:SI 2 "nonimmediate_operand" "rm,0"))
+ (zero_extend:DI
+ (match_operand:SI 3 "nonimmediate_operand" "0,rm"))))]
+ "TARGET_64BIT
+ && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+ "@
+ cmov%O2%C1\t{%2, %k0|%k0, %2}
+ cmov%O2%c1\t{%3, %k0|%k0, %3}"
+ [(set_attr "type" "icmov")
+ (set_attr "mode" "SI")])
+
;; Don't do conditional moves with memory inputs. This splitter helps
;; register starved x86_32 by forcing inputs into registers before reload.
(define_split
@@ -16797,30 +16813,65 @@
;; Don't do conditional moves with memory inputs
(define_peephole2
- [(match_scratch:SWI248 2 "r")
+ [(match_scratch:SWI248 4 "r")
(set (match_operand:SWI248 0 "register_operand")
(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
[(reg FLAGS_REG) (const_int 0)])
- (match_dup 0)
- (match_operand:SWI248 3 "memory_operand")))]
+ (match_operand:SWI248 2 "nonimmediate_operand")
+ (match_operand:SWI248 3 "nonimmediate_operand")))]
"TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+ && (MEM_P (operands[2]) || MEM_P (operands[3]))
&& optimize_insn_for_speed_p ()"
- [(set (match_dup 2) (match_dup 3))
+ [(set (match_dup 4) (match_dup 5))
(set (match_dup 0)
- (if_then_else:SWI248 (match_dup 1) (match_dup 0) (match_dup 2)))])
+ (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
+{
+ if (MEM_P (operands[2]))
+ {
+ operands[5] = operands[2];
+ operands[2] = operands[4];
+ }
+ else if (MEM_P (operands[3]))
+ {
+ operands[5] = operands[3];
+ operands[3] = operands[4];
+ }
+ else
+ gcc_unreachable ();
+})
(define_peephole2
- [(match_scratch:SWI248 2 "r")
- (set (match_operand:SWI248 0 "register_operand")
- (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
- [(reg FLAGS_REG) (const_int 0)])
- (match_operand:SWI248 3 "memory_operand")
- (match_dup 0)))]
- "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+ [(match_scratch:SI 4 "r")
+ (set (match_operand:DI 0 "register_operand")
+ (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (zero_extend:DI
+ (match_operand:SI 2 "nonimmediate_operand"))
+ (zero_extend:DI
+ (match_operand:SI 3 "nonimmediate_operand"))))]
+ "TARGET_64BIT
+ && TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+ && (MEM_P (operands[2]) || MEM_P (operands[3]))
&& optimize_insn_for_speed_p ()"
- [(set (match_dup 2) (match_dup 3))
+ [(set (match_dup 4) (match_dup 5))
(set (match_dup 0)
- (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 0)))])
+ (if_then_else:DI (match_dup 1)
+ (zero_extend:DI (match_dup 2))
+ (zero_extend:DI (match_dup 3))))]
+{
+ if (MEM_P (operands[2]))
+ {
+ operands[5] = operands[2];
+ operands[2] = operands[4];
+ }
+ else if (MEM_P (operands[3]))
+ {
+ operands[5] = operands[3];
+ operands[3] = operands[4];
+ }
+ else
+ gcc_unreachable ();
+})
(define_expand "mov<mode>cc"
[(set (match_operand:X87MODEF 0 "register_operand")
@@ -16922,35 +16973,35 @@
;; Don't do conditional moves with memory inputs
(define_peephole2
- [(match_scratch:MODEF 2 "r")
+ [(match_scratch:MODEF 4 "r")
(set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand")
(if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
[(reg FLAGS_REG) (const_int 0)])
- (match_dup 0)
- (match_operand:MODEF 3 "memory_operand")))]
+ (match_operand:MODEF 2 "nonimmediate_operand")
+ (match_operand:MODEF 3 "nonimmediate_operand")))]
"(<MODE>mode != DFmode || TARGET_64BIT)
&& TARGET_80387 && TARGET_CMOVE
&& TARGET_AVOID_MEM_OPND_FOR_CMOVE
+ && (MEM_P (operands[2]) || MEM_P (operands[3]))
&& optimize_insn_for_speed_p ()"
- [(set (match_dup 2) (match_dup 3))
+ [(set (match_dup 4) (match_dup 5))
(set (match_dup 0)
- (if_then_else:MODEF (match_dup 1) (match_dup 0) (match_dup 2)))])
+ (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
+{
+ if (MEM_P (operands[2]))
+ {
+ operands[5] = operands[2];
+ operands[2] = operands[4];
+ }
+ else if (MEM_P (operands[3]))
+ {
+ operands[5] = operands[3];
+ operands[3] = operands[4];
+ }
+ else
+ gcc_unreachable ();
+})
-(define_peephole2
- [(match_scratch:MODEF 2 "r")
- (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand")
- (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
- [(reg FLAGS_REG) (const_int 0)])
- (match_operand:MODEF 3 "memory_operand")
- (match_dup 0)))]
- "(<MODE>mode != DFmode || TARGET_64BIT)
- && TARGET_80387 && TARGET_CMOVE
- && TARGET_AVOID_MEM_OPND_FOR_CMOVE
- && optimize_insn_for_speed_p ()"
- [(set (match_dup 2) (match_dup 3))
- (set (match_dup 0)
- (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 0)))])
-
;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
;; the scalar versions to have only XMM registers as operands.
Index: testsuite/gcc.target/i386/cmov9.c
===================================================================
--- testsuite/gcc.target/i386/cmov9.c (revision 0)
+++ testsuite/gcc.target/i386/cmov9.c (working copy)
@@ -0,0 +1,8 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -dp" } */
+/* { dg-final { scan-assembler-not "zero_extendsidi" } } */
+
+unsigned long long foo (int a, unsigned int b, unsigned int c)
+{
+ return a ? b : c;
+}
More information about the Gcc-patches
mailing list