[aarch64] Fix target/69176

Richard Earnshaw (lists) Richard.Earnshaw@arm.com
Mon Jan 18 10:04:00 GMT 2016


> +(define_constraint "Upl"
> +  "A constraint that matches two uses of add instructions."

That's not a particularly helpful description for external users of the
compiler.  I think that either needs to be sufficiently precise that
people who understand the ISA but not the guts of GCC can use it, or it
should be marked @internal.

Otherwise OK.

R.

On 15/01/16 21:36, Richard Henderson wrote:
> See the PR for details, but basically, the plus operations are special so you
> can't just split out one of the alternatives to a different pattern.
> 
> This merges the two-instruction add case back into the main plus pattern, and
> then adds peepholes and splitters to generate the same code as before.
> 
> Ok?
> 
> 
> r~
> 
> 
> d-69176
> 
> 
> 	* config/aarch64/aarch64.md (add<GPI>3): Move long immediate
> 	operands to pseudo only if CSE is expected.  Split long immediate
> 	operands only after reload, and for the stack pointer.
> 	(*add<GPI>3_pluslong): Remove.
> 	(*addsi3_aarch64, *adddi3_aarch64): Merge into...
> 	(*add<GPI>3_aarch64): ... here.  Add r/rk/Upl alternative.
> 	(*addsi3_aarch64_uxtw): Add r/rk/Upl alternative.
> 	(*add<GPI>3 peepholes): New.
> 	(*add<GPI>3 splitters): New.
> 	* config/aarch64/constraints.md (Upl): New.
> 	* config/aarch64/predicates.md (aarch64_pluslong_strict_immedate): New.
> 
> 
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index f6c8eb1..bde231b 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1590,96 +1590,120 @@
>      (plus:GPI (match_operand:GPI 1 "register_operand" "")
>  	      (match_operand:GPI 2 "aarch64_pluslong_operand" "")))]
>    ""
> -  "
> -  if (!aarch64_plus_operand (operands[2], VOIDmode))
> +{
> +  if (aarch64_pluslong_strict_immedate (operands[2], <MODE>mode))
>      {
> -      if (can_create_pseudo_p ())
> -	{
> -	  rtx tmp = gen_reg_rtx (<MODE>mode);
> -	  emit_move_insn (tmp, operands[2]);
> -	  operands[2] = tmp;
> -	}
> -      else
> +      /* Give CSE the opportunity to share this constant across additions.  */
> +      if (!cse_not_expected && can_create_pseudo_p ())
> +        operands[2] = force_reg (<MODE>mode, operands[2]);
> +
> +      /* Split will refuse to operate on a modification to the stack pointer.
> +	 Aid the prologue and epilogue expanders by splitting this now.  */
> +      else if (reload_completed && operands[0] == stack_pointer_rtx)
>  	{
> -	  HOST_WIDE_INT imm = INTVAL (operands[2]);
> -	  imm = imm >= 0 ? imm & 0xfff : -(-imm & 0xfff);
> -	  emit_insn (gen_add<mode>3 (operands[0], operands[1],
> -				     GEN_INT (INTVAL (operands[2]) - imm)));
> +	  HOST_WIDE_INT i = INTVAL (operands[2]);
> +	  HOST_WIDE_INT s = (i >= 0 ? i & 0xfff : -(-i & 0xfff));
> +	  emit_insn (gen_rtx_SET (operands[0],
> +				  gen_rtx_PLUS (<MODE>mode, operands[1],
> +						GEN_INT (i - s))));
>  	  operands[1] = operands[0];
> -	  operands[2] = GEN_INT (imm);
> +	  operands[2] = GEN_INT (s);
>  	}
>      }
> -  "
> -)
> -
> -;; Find add with a 2-instruction immediate and merge into 2 add instructions.
> -
> -(define_insn_and_split "*add<mode>3_pluslong"
> -  [(set
> -    (match_operand:GPI 0 "register_operand" "=r")
> -    (plus:GPI (match_operand:GPI 1 "register_operand" "r")
> -	      (match_operand:GPI 2 "aarch64_pluslong_immediate" "i")))]
> -  "!aarch64_plus_operand (operands[2], VOIDmode)
> -   && !aarch64_move_imm (INTVAL (operands[2]), <MODE>mode)"
> -  "#"
> -  "&& true"
> -  [(set (match_dup 0) (plus:GPI (match_dup 1) (match_dup 3)))
> -   (set (match_dup 0) (plus:GPI (match_dup 0) (match_dup 4)))]
> -  "
> -    {
> -      HOST_WIDE_INT imm = INTVAL (operands[2]);
> -      imm = imm >= 0 ? imm & 0xfff : -(-imm & 0xfff);
> -      operands[3] = GEN_INT (INTVAL (operands[2]) - imm);
> -      operands[4] = GEN_INT (imm);
> -    }
> -  "
> -)
> +})
>  
> -(define_insn "*addsi3_aarch64"
> +(define_insn "*add<mode>3_aarch64"
>    [(set
> -    (match_operand:SI 0 "register_operand" "=rk,rk,w,rk")
> -    (plus:SI
> -     (match_operand:SI 1 "register_operand" "%rk,rk,w,rk")
> -     (match_operand:SI 2 "aarch64_plus_operand" "I,r,w,J")))]
> +    (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r")
> +    (plus:GPI
> +     (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk")
> +     (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Upl")))]
>    ""
>    "@
> -  add\\t%w0, %w1, %2
> -  add\\t%w0, %w1, %w2
> -  add\\t%0.2s, %1.2s, %2.2s
> -  sub\\t%w0, %w1, #%n2"
> -  [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm")
> -   (set_attr "simd" "*,*,yes,*")]
> +  add\\t%<w>0, %<w>1, %2
> +  add\\t%<w>0, %<w>1, %<w>2
> +  add\\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
> +  sub\\t%<w>0, %<w>1, #%n2
> +  #"
> +  [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple")
> +   (set_attr "simd" "*,*,yes,*,*")]
>  )
>  
>  ;; zero_extend version of above
>  (define_insn "*addsi3_aarch64_uxtw"
>    [(set
> -    (match_operand:DI 0 "register_operand" "=rk,rk,rk")
> +    (match_operand:DI 0 "register_operand" "=rk,rk,rk,r")
>      (zero_extend:DI
> -     (plus:SI (match_operand:SI 1 "register_operand" "%rk,rk,rk")
> -              (match_operand:SI 2 "aarch64_plus_operand" "I,r,J"))))]
> +     (plus:SI (match_operand:SI 1 "register_operand" "%rk,rk,rk,rk")
> +              (match_operand:SI 2 "aarch64_pluslong_operand" "I,r,J,Upl"))))]
>    ""
>    "@
>    add\\t%w0, %w1, %2
>    add\\t%w0, %w1, %w2
> -  sub\\t%w0, %w1, #%n2"
> -  [(set_attr "type" "alu_imm,alu_sreg,alu_imm")]
> +  sub\\t%w0, %w1, #%n2
> +  #"
> +  [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple")]
>  )
>  
> -(define_insn "*adddi3_aarch64"
> -  [(set
> -    (match_operand:DI 0 "register_operand" "=rk,rk,rk,w")
> -    (plus:DI
> -     (match_operand:DI 1 "register_operand" "%rk,rk,rk,w")
> -     (match_operand:DI 2 "aarch64_plus_operand" "I,r,J,w")))]
> -  ""
> -  "@
> -  add\\t%x0, %x1, %2
> -  add\\t%x0, %x1, %x2
> -  sub\\t%x0, %x1, #%n2
> -  add\\t%d0, %d1, %d2"
> -  [(set_attr "type" "alu_imm,alu_sreg,alu_imm,neon_add")
> -   (set_attr "simd" "*,*,*,yes")]
> +;; If there's a free register, and we can load the constant with a
> +;; single instruction, do so.  This has a chance to improve scheduling.
> +(define_peephole2
> +  [(match_scratch:GPI 3 "r")
> +   (set (match_operand:GPI 0 "register_operand")
> +	(plus:GPI
> +	  (match_operand:GPI 1 "register_operand")
> +	  (match_operand:GPI 2 "aarch64_pluslong_strict_immedate")))]
> +  "aarch64_move_imm (INTVAL (operands[2]), <MODE>mode)"
> +  [(set (match_dup 3) (match_dup 2))
> +   (set (match_dup 0) (plus:GPI (match_dup 1) (match_dup 3)))]
> +)
> +
> +(define_peephole2
> +  [(match_scratch:SI 3 "r")
> +   (set (match_operand:DI 0 "register_operand")
> +	(zero_extend:DI
> +	  (plus:SI
> +	    (match_operand:SI 1 "register_operand")
> +	    (match_operand:SI 2 "aarch64_pluslong_strict_immedate"))))]
> +  "aarch64_move_imm (INTVAL (operands[2]), SImode)"
> +  [(set (match_dup 3) (match_dup 2))
> +   (set (match_dup 0) (zero_extend:DI (plus:SI (match_dup 1) (match_dup 3))))]
> +)
> +
> +;; After peephole2 has had a chance to run, split any remaining long
> +;; additions into two add immediates.
> +(define_split
> +  [(set (match_operand:GPI 0 "register_operand")
> +	(plus:GPI
> +	  (match_operand:GPI 1 "register_operand")
> +	  (match_operand:GPI 2 "aarch64_pluslong_strict_immedate")))]
> +  "epilogue_completed"
> +  [(set (match_dup 0) (plus:GPI (match_dup 1) (match_dup 3)))
> +   (set (match_dup 0) (plus:GPI (match_dup 0) (match_dup 4)))]
> +  {
> +    HOST_WIDE_INT i = INTVAL (operands[2]);
> +    HOST_WIDE_INT s = (i >= 0 ? i & 0xfff : -(-i & 0xfff));
> +    operands[3] = GEN_INT (i - s);
> +    operands[4] = GEN_INT (s);
> +  }
> +)
> +
> +(define_split
> +  [(set (match_operand:DI 0 "register_operand")
> +	(zero_extend:DI
> +	  (plus:SI
> +	    (match_operand:SI 1 "register_operand")
> +	    (match_operand:SI 2 "aarch64_pluslong_strict_immedate"))))]
> +  "epilogue_completed"
> +  [(set (match_dup 5) (plus:SI (match_dup 1) (match_dup 3)))
> +   (set (match_dup 0) (zero_extend:DI (plus:SI (match_dup 5) (match_dup 4))))]
> +  {
> +    HOST_WIDE_INT i = INTVAL (operands[2]);
> +    HOST_WIDE_INT s = (i >= 0 ? i & 0xfff : -(-i & 0xfff));
> +    operands[3] = GEN_INT (i - s);
> +    operands[4] = GEN_INT (s);
> +    operands[5] = gen_lowpart (SImode, operands[0]);
> +  }
>  )
>  
>  (define_expand "addti3"
> diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
> index 9b77291..0208b25 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -35,6 +35,11 @@
>   (and (match_code "const_int")
>        (match_test "aarch64_uimm12_shift (ival)")))
>  
> +(define_constraint "Upl"
> +  "A constraint that matches two uses of add instructions."
> +  (and (match_code "const_int")
> +       (match_test "aarch64_pluslong_strict_immedate (op, VOIDmode)")))
> +
>  (define_constraint "J"
>   "A constant that can be used with a SUB operation (once negated)."
>   (and (match_code "const_int")
> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index a2eb69c..f3b514b 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -107,6 +107,10 @@
>    (and (match_code "const_int")
>         (match_test "(INTVAL (op) < 0xffffff && INTVAL (op) > -0xffffff)")))
>  
> +(define_predicate "aarch64_pluslong_strict_immedate"
> +  (and (match_operand 0 "aarch64_pluslong_immediate")
> +       (not (match_operand 0 "aarch64_plus_immediate"))))
> +
>  (define_predicate "aarch64_pluslong_operand"
>    (ior (match_operand 0 "register_operand")
>         (match_operand 0 "aarch64_pluslong_immediate")))
> 



More information about the Gcc-patches mailing list