[PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.

Fri Jun 16 08:42:00 GMT 2017

> 
> It doesn't need to, because all HImode CONST_INTs are already legitimate.
> You can just use emit_move_insn instead.
> 

Ah right, that's true.

> FWIW, the following seems to pass the same tests and avoids the subreg
> dance.  Just a proof of concept, and I'm not attached to the new iterator
> name.

Ah thanks! that is a bit simpler. I'll take a similar approach. 

> Thanks,
> Richard
> 
> 
> Index: gcc/gcc/config/aarch64/aarch64.md
> ==========================================================
> =========
> --- gcc.orig/gcc/config/aarch64/aarch64.md
> +++ gcc/gcc/config/aarch64/aarch64.md
> @@ -1063,7 +1063,28 @@
>    }
>  )
> 
> -(define_insn_and_split "*movhf_aarch64"
> +(define_split
> +  [(set (match_operand:GPF_MOV_F16 0 "nonimmediate_operand")
> +        (match_operand:GPF_MOV_F16 1 "immediate_operand"))]
> +  "TARGET_FLOAT
> +   && can_create_pseudo_p ()
> +   && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
> +   && !aarch64_float_const_representable_p (operands[1])
> +   && aarch64_float_const_rtx_p (operands[1])"
> +  [(const_int 0)]
> +  {
> +    unsigned HOST_WIDE_INT ival;
> +    if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> +      FAIL;
> +
> +    rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode);
> +    emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode));
> +    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
> +    DONE;
> +  }
> +)
> +
> +(define_insn "*movhf_aarch64"
>    [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w
> ,w,m,r,m ,r")
>  	(match_operand:HF 1 "general_operand"      "Y ,?rY,
> w,w,Ufc,Uvi,m,w,m,rY,r"))]
>    "TARGET_FLOAT && (register_operand (operands[0], HFmode) @@ -
> 1080,28 +1101,12 @@
>     ldrh\\t%w0, %1
>     strh\\t%w1, %0
>     mov\\t%w0, %w1"
> -  "&& can_create_pseudo_p ()
> -   && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
> -   && !aarch64_float_const_representable_p (operands[1])
> -   &&  aarch64_float_const_rtx_p (operands[1])"
> -  [(const_int 0)]
> -  "{
> -    unsigned HOST_WIDE_INT ival;
> -    if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> -      FAIL;
> -
> -    rtx tmp = gen_reg_rtx (SImode);
> -    aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> -    tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
> -    emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
> -    DONE;
> -  }"
>    [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
>  		     neon_move,f_loads,f_stores,load1,store1,mov_reg")
>     (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
>  )
> 
> -(define_insn_and_split "*movsf_aarch64"
> +(define_insn "*movsf_aarch64"
>    [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w
> ,w,m,r,m ,r,r")
>  	(match_operand:SF 1 "general_operand"      "Y ,?rY,
> w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
>    "TARGET_FLOAT && (register_operand (operands[0], SFmode) @@ -
> 1119,28 +1124,13 @@
>     str\\t%w1, %0
>     mov\\t%w0, %w1
>     mov\\t%w0, %1"
> -  "&& can_create_pseudo_p ()
> -   && !aarch64_can_const_movi_rtx_p (operands[1], SFmode)
> -   && !aarch64_float_const_representable_p (operands[1])
> -   &&  aarch64_float_const_rtx_p (operands[1])"
> -  [(const_int 0)]
> -  "{
> -    unsigned HOST_WIDE_INT ival;
> -    if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> -      FAIL;
> -
> -    rtx tmp = gen_reg_rtx (SImode);
> -    aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> -    emit_move_insn (operands[0], gen_lowpart (SFmode, tmp));
> -    DONE;
> -  }"
>    [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
>  		     f_loads,f_stores,load1,store1,mov_reg,\
>  		     fconsts")
>     (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
>  )
> 
> -(define_insn_and_split "*movdf_aarch64"
> +(define_insn "*movdf_aarch64"
>    [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w  ,?r,w,w  ,w
> ,w,m,r,m ,r,r")
>  	(match_operand:DF 1 "general_operand"      "Y , ?rY,
> w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
>    "TARGET_FLOAT && (register_operand (operands[0], DFmode) @@ -
> 1158,21 +1148,6 @@
>     str\\t%x1, %0
>     mov\\t%x0, %x1
>     mov\\t%x0, %1"
> -  "&& can_create_pseudo_p ()
> -   && !aarch64_can_const_movi_rtx_p (operands[1], DFmode)
> -   && !aarch64_float_const_representable_p (operands[1])
> -   &&  aarch64_float_const_rtx_p (operands[1])"
> -  [(const_int 0)]
> -  "{
> -    unsigned HOST_WIDE_INT ival;
> -    if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> -      FAIL;
> -
> -    rtx tmp = gen_reg_rtx (DImode);
> -    aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> -    emit_move_insn (operands[0], gen_lowpart (DFmode, tmp));
> -    DONE;
> -  }"
>    [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
>  		     f_loadd,f_stored,load1,store1,mov_reg,\
>  		     fconstd")
> Index: gcc/gcc/config/aarch64/iterators.md
> ==========================================================
> =========
> --- gcc.orig/gcc/config/aarch64/iterators.md
> +++ gcc/gcc/config/aarch64/iterators.md
> @@ -44,6 +44,10 @@
>  ;; Iterator for all scalar floating point modes (HF, SF, DF)
> (define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF])
> 
> +;; Iterator for all scalar floating point modes (HF, SF, DF), without
> +;; requiring AARCH64_ISA_F16 for HF.
> +(define_mode_iterator GPF_MOV_F16 [HF SF DF])
> +
>  ;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
> (define_mode_iterator GPF_TF_F16 [HF SF DF TF])
>