[PATCH] Optimize andl $0xffffffff, %eax

Richard Guenther richard.guenther@gmail.com
Thu May 21 15:44:00 GMT 2009


On Thu, May 21, 2009 at 3:16 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> On
> typedef unsigned long ul;
> typedef unsigned int ui;
> typedef unsigned short us;
> typedef unsigned char uc;
> ul f1 (ul x) { return x & 0xffffffff; } ul f2 (ul x) { return (ui) x; }
> ul f3 (ul x) { return x & 0xffff; } ul f4 (ul x) { return (us) x; }
> ul f5 (ul x) { return x & 0xff; } ul f6 (ul x) { return (uc) x; }
> ui f7 (ui x) { return x & 0xffff; } ui f8 (ui x) { return (us) x; }
> ui f9 (ui x) { return x & 0xff; } ui f10 (ui x) { return (uc) x; }
> (especially with -Os) I'd expect to see identical code generated
> for each pair of functions, but especially f1 is much longer than f2.
> -f1:
> -       movq    %rdi, %rax
> -       andl    $4294967295, %eax
> +f2:
> +       mov     %edi, %eax
> -f3:
> -       movzwq  %di,%rax
> +f4:
> +       movzwl  %di, %eax
> -f5:
> -       movzbq  %dil,%rax
> +f6:
> +       movzbl  %dil, %eax
> For f3/f5 we can generate one by shorter code by avoiding useless rex
> prefix, for f1 zero_extendsidi2 is much better.  But even for the 0xffff and
> 0xff cases I think it is useful to use zero_extend?idi2 during expansion,
> because that doesn't report CC reg clobber.
>
> Bootstrapped/regtested on x86_64-linux, ok for trunk?

Hm, isn't that something simplify-rtx should do?  Transform
(and:SI reg 0xfffff) to (zero-extend:SI (subreg ...))?

Richard.

> 2009-05-21  Jakub Jelinek  <jakub@redhat.com>
>
>        * config/i386/i386.md (anddi3): If dst and src1 are registers
>        and src2 is 0xffffffff, 0xffff or 0xff, expand as zero_extend?idi2.
>        (anddi_1_rex64): Use movzbl/movzwl instead of movzbq/movzwq.
>
> --- gcc/config/i386/i386.md.jj  2009-05-20 21:06:38.000000000 +0200
> +++ gcc/config/i386/i386.md     2009-05-21 11:14:48.000000000 +0200
> @@ -9508,7 +9508,32 @@
>        (and:DI (match_operand:DI 1 "nonimmediate_operand" "")
>                (match_operand:DI 2 "x86_64_szext_general_operand" "")))]
>   "TARGET_64BIT"
> -  "ix86_expand_binary_operator (AND, DImode, operands); DONE;")
> +{
> +  if (REG_P (operands[0]) && REG_P (operands[1]) && CONST_INT_P (operands[2]))
> +    {
> +      if (INTVAL (operands[2]) == ((unsigned HOST_WIDE_INT) 2 << 31) - 1)
> +       {
> +         operands[1] = gen_lowpart_SUBREG (SImode, operands[1]);
> +         emit_insn (gen_zero_extendsidi2 (operands[0], operands[1]));
> +         DONE;
> +       }
> +      if (INTVAL (operands[2]) == 0xffff)
> +       {
> +         operands[1] = gen_lowpart_SUBREG (HImode, operands[1]);
> +         emit_insn (gen_zero_extendhidi2 (operands[0], operands[1]));
> +         DONE;
> +       }
> +      if (INTVAL (operands[2]) == 0xff)
> +       {
> +         operands[1] = gen_lowpart_SUBREG (QImode, operands[1]);
> +         emit_insn (gen_zero_extendqidi2 (operands[0], operands[1]));
> +         DONE;
> +       }
> +    }
> +
> +  ix86_expand_binary_operator (AND, DImode, operands);
> +  DONE;
> +})
>
>  (define_insn "*anddi_1_rex64"
>   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
> @@ -9534,9 +9559,9 @@
>
>        operands[1] = gen_lowpart (mode, operands[1]);
>        if (mode == QImode)
> -         return "movz{bq|x}\t{%1,%0|%0, %1}";
> +         return "movz{bl|x}\t{%1,%k0|%k0, %1}";
>        else
> -         return "movz{wq|x}\t{%1,%0|%0, %1}";
> +         return "movz{wl|x}\t{%1,%k0|%k0, %1}";
>       }
>
>     default:
> @@ -9549,7 +9574,7 @@
>  }
>   [(set_attr "type" "alu,alu,alu,imovx")
>    (set_attr "length_immediate" "*,*,*,0")
> -   (set_attr "mode" "SI,DI,DI,DI")])
> +   (set_attr "mode" "SI,DI,DI,SI")])
>
>  (define_insn "*anddi_2"
>   [(set (reg FLAGS_REG)
>
>        Jakub
>



More information about the Gcc-patches mailing list