This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Optimize andl $0xffffffff, %eax
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Uros Bizjak <ubizjak at gmail dot com>, Jan Hubicka <jh at suse dot cz>, Richard Henderson <rth at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Thu, 21 May 2009 15:16:53 +0200
- Subject: [PATCH] Optimize andl $0xffffffff, %eax
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
On
typedef unsigned long ul;
typedef unsigned int ui;
typedef unsigned short us;
typedef unsigned char uc;
ul f1 (ul x) { return x & 0xffffffff; } ul f2 (ul x) { return (ui) x; }
ul f3 (ul x) { return x & 0xffff; } ul f4 (ul x) { return (us) x; }
ul f5 (ul x) { return x & 0xff; } ul f6 (ul x) { return (uc) x; }
ui f7 (ui x) { return x & 0xffff; } ui f8 (ui x) { return (us) x; }
ui f9 (ui x) { return x & 0xff; } ui f10 (ui x) { return (uc) x; }
(especially with -Os) I'd expect to see identical code generated
for each pair of functions, but especially f1 is much longer than f2.
-f1:
- movq %rdi, %rax
- andl $4294967295, %eax
+f2:
+ mov %edi, %eax
-f3:
- movzwq %di,%rax
+f4:
+ movzwl %di, %eax
-f5:
- movzbq %dil,%rax
+f6:
+ movzbl %dil, %eax
For f3/f5 we can generate one by shorter code by avoiding useless rex
prefix, for f1 zero_extendsidi2 is much better. But even for the 0xffff and
0xff cases I think it is useful to use zero_extend?idi2 during expansion,
because that doesn't report CC reg clobber.
Bootstrapped/regtested on x86_64-linux, ok for trunk?
2009-05-21 Jakub Jelinek <jakub@redhat.com>
* config/i386/i386.md (anddi3): If dst and src1 are registers
and src2 is 0xffffffff, 0xffff or 0xff, expand as zero_extend?idi2.
(anddi_1_rex64): Use movzbl/movzwl instead of movzbq/movzwq.
--- gcc/config/i386/i386.md.jj 2009-05-20 21:06:38.000000000 +0200
+++ gcc/config/i386/i386.md 2009-05-21 11:14:48.000000000 +0200
@@ -9508,7 +9508,32 @@
(and:DI (match_operand:DI 1 "nonimmediate_operand" "")
(match_operand:DI 2 "x86_64_szext_general_operand" "")))]
"TARGET_64BIT"
- "ix86_expand_binary_operator (AND, DImode, operands); DONE;")
+{
+ if (REG_P (operands[0]) && REG_P (operands[1]) && CONST_INT_P (operands[2]))
+ {
+ if (INTVAL (operands[2]) == ((unsigned HOST_WIDE_INT) 2 << 31) - 1)
+ {
+ operands[1] = gen_lowpart_SUBREG (SImode, operands[1]);
+ emit_insn (gen_zero_extendsidi2 (operands[0], operands[1]));
+ DONE;
+ }
+ if (INTVAL (operands[2]) == 0xffff)
+ {
+ operands[1] = gen_lowpart_SUBREG (HImode, operands[1]);
+ emit_insn (gen_zero_extendhidi2 (operands[0], operands[1]));
+ DONE;
+ }
+ if (INTVAL (operands[2]) == 0xff)
+ {
+ operands[1] = gen_lowpart_SUBREG (QImode, operands[1]);
+ emit_insn (gen_zero_extendqidi2 (operands[0], operands[1]));
+ DONE;
+ }
+ }
+
+ ix86_expand_binary_operator (AND, DImode, operands);
+ DONE;
+})
(define_insn "*anddi_1_rex64"
[(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
@@ -9534,9 +9559,9 @@
operands[1] = gen_lowpart (mode, operands[1]);
if (mode == QImode)
- return "movz{bq|x}\t{%1,%0|%0, %1}";
+ return "movz{bl|x}\t{%1,%k0|%k0, %1}";
else
- return "movz{wq|x}\t{%1,%0|%0, %1}";
+ return "movz{wl|x}\t{%1,%k0|%k0, %1}";
}
default:
@@ -9549,7 +9574,7 @@
}
[(set_attr "type" "alu,alu,alu,imovx")
(set_attr "length_immediate" "*,*,*,0")
- (set_attr "mode" "SI,DI,DI,DI")])
+ (set_attr "mode" "SI,DI,DI,SI")])
(define_insn "*anddi_2"
[(set (reg FLAGS_REG)
Jakub