This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Add {u,}mulvhi4 patterns on i?86 (PR target/66112)
- From: Ramana Radhakrishnan <ramana dot gcc at googlemail dot com>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: Uros Bizjak <ubizjak at gmail dot com>, gcc-patches <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 12 May 2015 20:26:57 +0100
- Subject: Re: [PATCH] Add {u,}mulvhi4 patterns on i?86 (PR target/66112)
- Authentication-results: sourceware.org; auth=none
- References: <20150512184327 dot GB1751 at tucnak dot redhat dot com>
- Reply-to: ramrad01 at arm dot com
On Tue, May 12, 2015 at 7:43 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> This patch improves expansion of __builtin_mul_overflow for HImode, both
> signed and unsigned, on x86_64/i686.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2015-05-12 Jakub Jelinek <jakub@redhat.com>
>
> PR target/66112
> * config/i386/i386.md (mulv<mode>4, umulv<mode>4, *umulv<mode>4):
> Use SWI248 iterator instead of SWI.
These names along with the other *v<mode>4 patterns need documentation
in md.texi.
Ramana
> (*mulv<mode>4_1): Use SWI48 instead of SWI. Simplify output template.
> Use eq_attr "alternative" "0" instead of match_test in
> length_immediate attribute computation.
> (*mulvhi4, *mulvhi4_1): New define_insns.
>
> * gcc.target/i386/pr66112-2.c: New test.
>
> --- gcc/config/i386/i386.md.jj 2015-05-11 09:08:21.000000000 +0200
> +++ gcc/config/i386/i386.md 2015-05-12 11:26:55.642794479 +0200
> @@ -6602,14 +6602,14 @@
> [(parallel [(set (reg:CCO FLAGS_REG)
> (eq:CCO (mult:<DWI>
> (sign_extend:<DWI>
> - (match_operand:SWI48 1 "register_operand"))
> + (match_operand:SWI248 1 "register_operand"))
> (match_dup 4))
> (sign_extend:<DWI>
> - (mult:SWI48 (match_dup 1)
> - (match_operand:SWI48 2
> - "<general_operand>")))))
> - (set (match_operand:SWI48 0 "register_operand")
> - (mult:SWI48 (match_dup 1) (match_dup 2)))])
> + (mult:SWI248 (match_dup 1)
> + (match_operand:SWI248 2
> + "<general_operand>")))))
> + (set (match_operand:SWI248 0 "register_operand")
> + (mult:SWI248 (match_dup 1) (match_dup 2)))])
> (set (pc) (if_then_else
> (eq (reg:CCO FLAGS_REG) (const_int 0))
> (label_ref (match_operand 3))
> @@ -6665,16 +6665,14 @@
> (match_operand:<DWI> 3 "const_int_operand" "K,i"))
> (sign_extend:<DWI>
> (mult:SWI48 (match_dup 1)
> - (match_operand:SWI 2 "x86_64_immediate_operand"
> - "K,<i>")))))
> + (match_operand:SWI48 2
> + "x86_64_immediate_operand" "K,<i>")))))
> (set (match_operand:SWI48 0 "register_operand" "=r,r")
> (mult:SWI48 (match_dup 1) (match_dup 2)))]
> "!(MEM_P (operands[1]) && MEM_P (operands[2]))
> && CONST_INT_P (operands[2])
> && INTVAL (operands[2]) == INTVAL (operands[3])"
> - "@
> - imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
> - imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> + "imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> [(set_attr "type" "imul")
> (set (attr "athlon_decode")
> (cond [(eq_attr "cpu" "athlon")
> @@ -6689,26 +6687,78 @@
> (set_attr "bdver1_decode" "direct")
> (set_attr "mode" "<MODE>")
> (set (attr "length_immediate")
> - (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
> + (cond [(eq_attr "alternative" "0")
> (const_string "1")
> (match_test "<MODE_SIZE> == 8")
> (const_string "4")]
> (const_string "<MODE_SIZE>")))])
>
> +(define_insn "*mulvhi4"
> + [(set (reg:CCO FLAGS_REG)
> + (eq:CCO (mult:SI
> + (sign_extend:SI
> + (match_operand:HI 1 "nonimmediate_operand" "0"))
> + (sign_extend:SI
> + (match_operand:HI 2 "general_operand" "mr")))
> + (sign_extend:SI
> + (mult:HI (match_dup 1) (match_dup 2)))))
> + (set (match_operand:HI 0 "register_operand" "=r")
> + (mult:HI (match_dup 1) (match_dup 2)))]
> + "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
> + "imul{w}\t{%2, %0|%0, %2}"
> + [(set_attr "type" "imul")
> + (set_attr "prefix_0f" "1")
> + (set_attr "athlon_decode" "vector")
> + (set_attr "amdfam10_decode" "direct")
> + (set_attr "bdver1_decode" "double")
> + (set_attr "mode" "HI")])
> +
> +(define_insn "*mulvhi4_1"
> + [(set (reg:CCO FLAGS_REG)
> + (eq:CCO (mult:SI
> + (sign_extend:SI
> + (match_operand:HI 1 "nonimmediate_operand" "rm,rm"))
> + (match_operand:SI 3 "const_int_operand" "K,i"))
> + (sign_extend:SI
> + (mult:HI (match_dup 1)
> + (match_operand:HI 2
> + "x86_64_immediate_operand" "K,n")))))
> + (set (match_operand:HI 0 "register_operand" "=r,r")
> + (mult:HI (match_dup 1) (match_dup 2)))]
> + "!(MEM_P (operands[1]) && MEM_P (operands[2]))
> + && CONST_INT_P (operands[2])
> + && INTVAL (operands[2]) == INTVAL (operands[3])"
> + "imul{w}\t{%2, %1, %0|%0, %1, %2}"
> + [(set_attr "type" "imul")
> + (set_attr "prefix_0f" "0")
> + (set (attr "athlon_decode")
> + (cond [(eq_attr "cpu" "athlon")
> + (const_string "vector")
> + (eq_attr "alternative" "1")
> + (const_string "vector")]
> + (const_string "direct")))
> + (set_attr "amdfam10_decode" "vector")
> + (set_attr "bdver1_decode" "double")
> + (set_attr "mode" "HI")
> + (set (attr "length_immediate")
> + (cond [(eq_attr "alternative" "0")
> + (const_string "1")]
> + (const_string "2")))])
> +
> (define_expand "umulv<mode>4"
> [(parallel [(set (reg:CCO FLAGS_REG)
> (eq:CCO (mult:<DWI>
> (zero_extend:<DWI>
> - (match_operand:SWI48 1
> + (match_operand:SWI248 1
> "nonimmediate_operand"))
> (zero_extend:<DWI>
> - (match_operand:SWI48 2
> + (match_operand:SWI248 2
> "nonimmediate_operand")))
> (zero_extend:<DWI>
> - (mult:SWI48 (match_dup 1) (match_dup 2)))))
> - (set (match_operand:SWI48 0 "register_operand")
> - (mult:SWI48 (match_dup 1) (match_dup 2)))
> - (clobber (match_scratch:SWI48 4))])
> + (mult:SWI248 (match_dup 1) (match_dup 2)))))
> + (set (match_operand:SWI248 0 "register_operand")
> + (mult:SWI248 (match_dup 1) (match_dup 2)))
> + (clobber (match_scratch:SWI248 4))])
> (set (pc) (if_then_else
> (eq (reg:CCO FLAGS_REG) (const_int 0))
> (label_ref (match_operand 3))
> @@ -6723,14 +6773,14 @@
> [(set (reg:CCO FLAGS_REG)
> (eq:CCO (mult:<DWI>
> (zero_extend:<DWI>
> - (match_operand:SWI48 1 "nonimmediate_operand" "%0"))
> + (match_operand:SWI248 1 "nonimmediate_operand" "%0"))
> (zero_extend:<DWI>
> - (match_operand:SWI48 2 "nonimmediate_operand" "rm")))
> + (match_operand:SWI248 2 "nonimmediate_operand" "rm")))
> (zero_extend:<DWI>
> - (mult:SWI48 (match_dup 1) (match_dup 2)))))
> - (set (match_operand:SWI48 0 "register_operand" "=a")
> - (mult:SWI48 (match_dup 1) (match_dup 2)))
> - (clobber (match_scratch:SWI48 3 "=d"))]
> + (mult:SWI248 (match_dup 1) (match_dup 2)))))
> + (set (match_operand:SWI248 0 "register_operand" "=a")
> + (mult:SWI248 (match_dup 1) (match_dup 2)))
> + (clobber (match_scratch:SWI248 3 "=d"))]
> "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
> "mul{<imodesuffix>}\t%2"
> [(set_attr "type" "imul")
> --- gcc/testsuite/gcc.target/i386/pr66112-2.c.jj 2015-05-12 10:46:18.565328732 +0200
> +++ gcc/testsuite/gcc.target/i386/pr66112-2.c 2015-05-12 10:50:16.203437790 +0200
> @@ -0,0 +1,29 @@
> +/* PR target/66112 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +unsigned short int
> +foo (int a, int b)
> +{
> + unsigned short int res;
> + a &= 0xffff;
> + b &= 0xffff;
> + if (__builtin_mul_overflow (a, b, &res))
> + res = 0x123;
> + return res;
> +}
> +
> +short int
> +bar (int a, int b)
> +{
> + short int res;
> + a = (short int) a;
> + b = (short int) b;
> + if (__builtin_mul_overflow (a, b, &res))
> + res = 0x123;
> + return res;
> +}
> +
> +/* { dg-final { scan-assembler-times "jn?o\[ \t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "mulw\[ \t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "imulw\[ \t\]" 1 } } */
>
> Jakub