[PATCH][ARM] Remove remaining Neon DImode support

Kyrill Tkachov kyrylo.tkachov@foss.arm.com
Thu Aug 22 13:46:00 GMT 2019


Hi Wilco,

On 7/22/19 5:18 PM, Wilco Dijkstra wrote:
> Remove the remaining Neon adddi3, subdi3 and negdi2 patterns.  As a result
> adddi3, subdi3 and negdi2 can now always be expanded early 
> irrespectively of
> whether Neon is available.  Also expand the extenddi patterns at the same
> time.  Several Neon arch attributes are no longer used and removed.
>
> Code generation is improved in all cases, saving another 400-500 
> instructions
> from the PR77308 testcase (total improvement is over 1700 instructions 
> with
> -mcpu=cortex-a57 -O2).
>
> Bootstrap & regress OK on arm-none-linux-gnueabihf --with-cpu=cortex-a57
>
Ok.

Thanks,

Kyrill

> ChangeLog:
> 2019-07-19  Wilco Dijkstra  <wdijkstr@arm.com>
>
> * config/arm/arm.md (neon_for_64bits): Remove.
> (avoid_neon_for_64bits): Remove.
> (arm_adddi3): Always split early.
> (arm_subdi3): Always split early.
> (negdi2): Remove Neon expansion.
> (split zero_extend): Split before reload.
> (split sign_extend): Split before reload.
> ---
>
> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
> index 
> 10ed70dac4384354c0a2453c5e51a29108c6c062..6d8a5a54997caee0e6956f01018cb5300a9a07e1 
> 100644
> --- a/gcc/config/arm/arm.md
> +++ b/gcc/config/arm/arm.md
> @@ -125,7 +125,7 @@ (define_attr "length" ""
>  ; arm_arch6.  "v6t2" for Thumb-2 with arm_arch6 and "v8mb" for ARMv8-M
>  ; Baseline.  This attribute is used to compute attribute "enabled",
>  ; use type "any" to enable an alternative in all cases.
> -(define_attr "arch" 
> "any,a,t,32,t1,t2,v6,nov6,v6t2,v8mb,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3,neon"
> +(define_attr "arch" 
> "any,a,t,32,t1,t2,v6,nov6,v6t2,v8mb,iwmmxt,iwmmxt2,armv6_or_vfpv3,neon"
>    (const_string "any"))
>
>  (define_attr "arch_enabled" "no,yes"
> @@ -168,16 +168,6 @@ (define_attr "arch_enabled" "no,yes"
>        (match_test "TARGET_THUMB1 && arm_arch8"))
>   (const_string "yes")
>
> - (and (eq_attr "arch" "avoid_neon_for_64bits")
> -      (match_test "TARGET_NEON")
> -      (not (match_test "TARGET_PREFER_NEON_64BITS")))
> - (const_string "yes")
> -
> - (and (eq_attr "arch" "neon_for_64bits")
> -      (match_test "TARGET_NEON")
> -      (match_test "TARGET_PREFER_NEON_64BITS"))
> - (const_string "yes")
> -
>   (and (eq_attr "arch" "iwmmxt2")
>        (match_test "TARGET_REALLY_IWMMXT2"))
>   (const_string "yes")
> @@ -450,13 +440,8 @@ (define_expand "adddi3"
>      (clobber (reg:CC CC_REGNUM))])]
>    "TARGET_EITHER"
>    "
> -  if (TARGET_THUMB1)
> -    {
> -      if (!REG_P (operands[1]))
> -        operands[1] = force_reg (DImode, operands[1]);
> -      if (!REG_P (operands[2]))
> -        operands[2] = force_reg (DImode, operands[2]);
> -     }
> +  if (TARGET_THUMB1 && !REG_P (operands[2]))
> +    operands[2] = force_reg (DImode, operands[2]);
>    "
>  )
>
> @@ -465,9 +450,9 @@ (define_insn_and_split "*arm_adddi3"
>  (plus:DI (match_operand:DI 1 "arm_general_register_operand" "%0, 0, 
> r, 0, r")
>   (match_operand:DI 2 "arm_general_adddi_operand" "r,  0, r, Dd, Dd")))
>     (clobber (reg:CC CC_REGNUM))]
> -  "TARGET_32BIT && !TARGET_NEON"
> +  "TARGET_32BIT"
>    "#"
> -  "TARGET_32BIT && ((!TARGET_NEON && !TARGET_IWMMXT) || 
> reload_completed)"
> +  "TARGET_32BIT"
>    [(parallel [(set (reg:CC_C CC_REGNUM)
>     (compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
>   (match_dup 1)))
> @@ -1290,24 +1275,16 @@ (define_expand "subdi3"
>      (clobber (reg:CC CC_REGNUM))])]
>    "TARGET_EITHER"
>    "
> -  if (TARGET_THUMB1)
> -    {
> -      if (!REG_P (operands[1]))
> -        operands[1] = force_reg (DImode, operands[1]);
> -      if (!REG_P (operands[2]))
> -        operands[2] = force_reg (DImode, operands[2]);
> -     }
> -  "
> -)
> +")
>
>  (define_insn_and_split "*arm_subdi3"
>    [(set (match_operand:DI           0 "arm_general_register_operand" 
> "=&r,&r,&r")
>  (minus:DI (match_operand:DI 1 "arm_general_register_operand" "0,r,0")
>    (match_operand:DI 2 "arm_general_register_operand" "r,0,0")))
>     (clobber (reg:CC CC_REGNUM))]
> -  "TARGET_32BIT && !TARGET_NEON"
> +  "TARGET_32BIT"
>    "#"  ; "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
> -  "&& (!TARGET_IWMMXT || reload_completed)"
> +  "TARGET_32BIT"
>    [(parallel [(set (reg:CC CC_REGNUM)
>     (compare:CC (match_dup 1) (match_dup 2)))
>        (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
> @@ -4164,13 +4141,6 @@ (define_expand "negdi2"
>   (neg:DI (match_operand:DI 1 "s_register_operand")))
>      (clobber (reg:CC CC_REGNUM))])]
>    "TARGET_EITHER"
> -  {
> -    if (TARGET_NEON)
> -      {
> -        emit_insn (gen_negdi2_neon (operands[0], operands[1]));
> -DONE;
> -      }
> -  }
>  )
>
>  ;; The constraints here are to prevent a *partial* overlap (where %Q0 
> == %R1).
> @@ -4182,7 +4152,7 @@ (define_insn_and_split "*negdi2_insn"
>    "TARGET_32BIT"
>    "#"; rsbs %Q0, %Q1, #0; rsc %R0, %R1, #0       (ARM)
>  ; negs %Q0, %Q1    ; sbc %R0, %R1, %R1, lsl #1 (Thumb-2)
> -  "&& reload_completed"
> +  "TARGET_32BIT"
>    [(parallel [(set (reg:CC CC_REGNUM)
>     (compare:CC (const_int 0) (match_dup 1)))
>        (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))])
> @@ -4714,25 +4684,17 @@ (define_insn "extend<mode>di2"
>  (define_split
>    [(set (match_operand:DI 0 "s_register_operand" "")
>          (zero_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
> -  "TARGET_32BIT && reload_completed && !IS_VFP_REGNUM (REGNO 
> (operands[0]))"
> +  "TARGET_32BIT"
>    [(set (match_dup 0) (match_dup 1))]
>  {
>    rtx lo_part = gen_lowpart (SImode, operands[0]);
>    machine_mode src_mode = GET_MODE (operands[1]);
>
> -  if (REG_P (operands[0])
> -      && !reg_overlap_mentioned_p (operands[0], operands[1]))
> -    emit_clobber (operands[0]);
> -  if (!REG_P (lo_part) || src_mode != SImode
> -      || !rtx_equal_p (lo_part, operands[1]))
> -    {
> -      if (src_mode == SImode)
> -        emit_move_insn (lo_part, operands[1]);
> -      else
> -        emit_insn (gen_rtx_SET (lo_part,
> -gen_rtx_ZERO_EXTEND (SImode, operands[1])));
> -      operands[1] = lo_part;
> -    }
> +  if (src_mode == SImode)
> +    emit_move_insn (lo_part, operands[1]);
> +  else
> +    emit_insn (gen_rtx_SET (lo_part,
> +    gen_rtx_ZERO_EXTEND (SImode, operands[1])));
>    operands[0] = gen_highpart (SImode, operands[0]);
>    operands[1] = const0_rtx;
>  })
> @@ -4740,26 +4702,18 @@ (define_split
>  (define_split
>    [(set (match_operand:DI 0 "s_register_operand" "")
>          (sign_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
> -  "TARGET_32BIT && reload_completed && !IS_VFP_REGNUM (REGNO 
> (operands[0]))"
> +  "TARGET_32BIT"
>    [(set (match_dup 0) (ashiftrt:SI (match_dup 1) (const_int 31)))]
>  {
>    rtx lo_part = gen_lowpart (SImode, operands[0]);
>    machine_mode src_mode = GET_MODE (operands[1]);
>
> -  if (REG_P (operands[0])
> -      && !reg_overlap_mentioned_p (operands[0], operands[1]))
> -    emit_clobber (operands[0]);
> -
> -  if (!REG_P (lo_part) || src_mode != SImode
> -      || !rtx_equal_p (lo_part, operands[1]))
> -    {
> -      if (src_mode == SImode)
> -        emit_move_insn (lo_part, operands[1]);
> -      else
> -        emit_insn (gen_rtx_SET (lo_part,
> -gen_rtx_SIGN_EXTEND (SImode, operands[1])));
> -      operands[1] = lo_part;
> -    }
> +  if (src_mode == SImode)
> +    emit_move_insn (lo_part, operands[1]);
> +  else
> +    emit_insn (gen_rtx_SET (lo_part,
> +    gen_rtx_SIGN_EXTEND (SImode, operands[1])));
> +  operands[1] = lo_part;
>    operands[0] = gen_highpart (SImode, operands[0]);
>  })
>
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index 
> 757f2c0f5377148c770e061849424aed924a7d7a..0c1ee746b6ada4f83040cd1717f17bef03dc2264 
> 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -527,32 +527,6 @@ (define_insn "add<mode>3_fp16"
>      (const_string "neon_add<q>")))]
>  )
>
> -(define_insn "adddi3_neon"
> -  [(set (match_operand:DI 0 "s_register_operand" 
> "=w,?&r,?&r,?w,?&r,?&r,?&r")
> -        (plus:DI (match_operand:DI 1 "s_register_operand" 
> "%w,0,0,w,r,0,r")
> -                 (match_operand:DI 2 "arm_adddi_operand"     
> "w,r,0,w,r,Dd,Dd")))
> -   (clobber (reg:CC CC_REGNUM))]
> -  "TARGET_NEON"
> -{
> -  switch (which_alternative)
> -    {
> -    case 0: /* fall through */
> -    case 3: return "vadd.i64\t%P0, %P1, %P2";
> -    case 1: return "#";
> -    case 2: return "#";
> -    case 4: return "#";
> -    case 5: return "#";
> -    case 6: return "#";
> -    default: gcc_unreachable ();
> -    }
> -}
> -  [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
> -     multiple,multiple,multiple")
> -   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
> -   (set_attr "length" "*,8,8,*,8,8,8")
> -   (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
> -)
> -
>  (define_insn "*sub<mode>3_neon"
>    [(set (match_operand:VDQ 0 "s_register_operand" "=w")
>          (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
> @@ -587,29 +561,6 @@ (define_insn "sub<mode>3_fp16"
>   [(set_attr "type" "neon_sub<q>")]
>  )
>
> -(define_insn "subdi3_neon"
> -  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
> -        (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
> -                  (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
> -   (clobber (reg:CC CC_REGNUM))]
> -  "TARGET_NEON"
> -{
> -  switch (which_alternative)
> -    {
> -    case 0: /* fall through */
> -    case 4: return "vsub.i64\t%P0, %P1, %P2";
> -    case 1: /* fall through */
> -    case 2: /* fall through */
> -    case 3: return  "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
> -    default: gcc_unreachable ();
> -    }
> -}
> -  [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
> -   (set_attr "conds" "*,clob,clob,clob,*")
> -   (set_attr "length" "*,8,8,8,*")
> -   (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
> -)
> -
>  (define_insn "*mul<mode>3_neon"
>    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
>          (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
> @@ -886,46 +837,6 @@ (define_insn "neg<mode>2"
>                      (const_string "neon_neg<q>")))]
>  )
>
> -(define_insn "negdi2_neon"
> -  [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
> -(neg:DI (match_operand:DI 1 "s_register_operand" "  w, w,0, r")))
> -   (clobber (match_scratch:DI 2 "= X,&w,X, X"))
> -   (clobber (reg:CC CC_REGNUM))]
> -  "TARGET_NEON"
> -  "#"
> -  [(set_attr "length" "8")
> -   (set_attr "type" "multiple")]
> -)
> -
> -; Split negdi2_neon for vfp registers
> -(define_split
> -  [(set (match_operand:DI 0 "s_register_operand" "")
> -(neg:DI (match_operand:DI 1 "s_register_operand" "")))
> -   (clobber (match_scratch:DI 2 ""))
> -   (clobber (reg:CC CC_REGNUM))]
> -  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO 
> (operands[0]))"
> -  [(set (match_dup 2) (const_int 0))
> -   (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
> -      (clobber (reg:CC CC_REGNUM))])]
> -  {
> -    if (!REG_P (operands[2]))
> -      operands[2] = operands[0];
> -  }
> -)
> -
> -; Split negdi2_neon for core registers
> -(define_split
> -  [(set (match_operand:DI 0 "s_register_operand" "")
> -(neg:DI (match_operand:DI 1 "s_register_operand" "")))
> -   (clobber (match_scratch:DI 2 ""))
> -   (clobber (reg:CC CC_REGNUM))]
> -  "TARGET_32BIT && reload_completed
> -   && arm_general_register_operand (operands[0], DImode)"
> -  [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
> -      (clobber (reg:CC CC_REGNUM))])]
> -  ""
> -)
> -
>  (define_insn "<absneg_str><mode>2"
>    [(set (match_operand:VH 0 "s_register_operand" "=w")
>      (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]



More information about the Gcc-patches mailing list