[PATCH] ARM/NEON: vld1q_dup_s64 builtin
Ramana Radhakrishnan
ramana.radhakrishnan@linaro.org
Fri May 18 22:46:00 GMT 2012
On 16 May 2012 14:51, Christophe Lyon <christophe.lyon@st.com> wrote:
> On 11.05.2012 16:48, Ramana Radhakrishnan wrote:
>>
>> I would change the iterator from VQX to VQ in the pattern above (you
>> can also simplify the setting of neon_type in that case as well as
>> change that to be a vec_duplicate as below and get rid of any
>> lingering definitions of UNSPEC_VLD1_DUP if they exist), define a
>> separate pattern that expressed this as a define_insn_and_split as
>> below.
>>
>> (define_insn_and_split "neon_vld1_dupv2di"
>> [(set (match_operand:V2DI 0 "s_register_operand" "=w")
>> (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand"
>> "Um")))]
>> "TARGET_NEON"
>> "#"
>> "&& reload_completed"
>> [(const_int 0)]
>> {
>> rtx tmprtx = gen_lowpart (DImode, operands[0]);
>> emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
>> emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
>> DONE;
>> }
>> (set_attr "length" "8")
>> (set_attr "neon_type" "<fromearlierpattern">)
>> )
>>
>> Do you want to try this and see what you get ?
>
>
> Thanks for this example and suggestion, it does work.
>
>
>> I'd rather have an extra regression test in gcc.target/arm that was a run
>> time test. for e.g. take a look at gcc.target/arm/neon-vadds64.c .
>
>
> Here is an updated patch:
I tried applying your patch but ran into trouble with patch not liking
this . My suspicion is mailer munging white spaces in some form -
Could you send the patch as an attachment please rather than inline in
your mail ?
regards,
Ramana
> 2012-05-16 Christophe Lyon <christophe.lyon@st.com>
>
> * gcc/config/arm/neon.md (neon_vld1_dup): Restrict to VQ
> operands.
> (neon_vld1_dupv2di): New, fixes vld1q_dup_s64.
> * gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c: New test.
>
> Index: gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c
> ===================================================================
> --- gcc.orig/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c (revision 0)
> +++ gcc.new/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c (revision 0)
> @@ -0,0 +1,24 @@
> +/* Test the `vld1q_s64' ARM Neon intrinsic. */
> +
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_neon_hw } */
> +/* { dg-options "-O0" } */
> +/* { dg-add-options arm_neon } */
> +
> +#include "arm_neon.h"
> +#include <stdlib.h>
> +
> +int main (void)
> +{
> + int64x1_t input[2] = {(int64x1_t)0x0123456776543210LL,
> + (int64x1_t)0x89abcdeffedcba90LL};
> + int64x1_t output[2] = {0, 0};
> + int64x2_t var = vld1q_dup_s64(input);
> +
> + vst1q_s64(output, var);
> + if (output[0] != (int64x1_t)0x0123456776543210LL)
> + abort();
> + if (output[1] != (int64x1_t)0x0123456776543210LL)
> + abort();
> + return 0;
> +}
> Index: gcc/config/arm/neon.md
> ===================================================================
> --- gcc.orig/gcc/config/arm/neon.md (revision 2659)
> +++ gcc.new/gcc/config/arm/neon.md (working copy)
> @@ -4195,20 +4195,32 @@
> )
>
> (define_insn "neon_vld1_dup<mode>"
> - [(set (match_operand:VQX 0 "s_register_operand" "=w")
> - (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
> + [(set (match_operand:VQ 0 "s_register_operand" "=w")
> + (unspec:VQ [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
> UNSPEC_VLD1_DUP))]
> "TARGET_NEON"
> {
> - if (GET_MODE_NUNITS (<MODE>mode) > 2)
>
> return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
> - else
>
> - return "vld1.<V_sz_elem>\t%h0, %A1";
> }
> [(set (attr "neon_type")
> - (if_then_else (gt (const_string "<V_mode_nunits>") (const_string
> "1"))
> - (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
> - (const_string "neon_vld1_1_2_regs")))]
> + (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))]
> +)
> +
> +(define_insn_and_split "neon_vld1_dupv2di"
> + [(set (match_operand:V2DI 0 "s_register_operand" "=w")
> + (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
> + "TARGET_NEON"
> + "#"
> + "&& reload_completed"
> + [(const_int 0)]
> + {
> + rtx tmprtx = gen_lowpart (DImode, operands[0]);
> + emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
> + emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
> + DONE;
> + }
> + [(set_attr "length" "8")
> + (set (attr "neon_type") (const_string
> "neon_vld2_2_regs_vld1_vld2_all_lanes"))]
> )
>
> (define_expand "vec_store_lanes<mode><mode>"
>
>
>
More information about the Gcc-patches
mailing list