[aarch64] Fix target/69176
Richard Earnshaw (lists)
Richard.Earnshaw@arm.com
Mon Jan 18 10:04:00 GMT 2016
> +(define_constraint "Upl"
> + "A constraint that matches two uses of add instructions."
That's not a particularly helpful description for external users of the
compiler. I think that either needs to be sufficiently precise that
people who understand the ISA but not the guts of GCC can use it, or it
should be marked @internal.
Otherwise OK.
R.
On 15/01/16 21:36, Richard Henderson wrote:
> See the PR for details, but basically, the plus operations are special so you
> can't just split out one of the alternatives to a different pattern.
>
> This merges the two-instruction add case back into the main plus pattern, and
> then adds peepholes and splitters to generate the same code as before.
>
> Ok?
>
>
> r~
>
>
> d-69176
>
>
> * config/aarch64/aarch64.md (add<GPI>3): Move long immediate
> operands to pseudo only if CSE is expected. Split long immediate
> operands only after reload, and for the stack pointer.
> (*add<GPI>3_pluslong): Remove.
> (*addsi3_aarch64, *adddi3_aarch64): Merge into...
> (*add<GPI>3_aarch64): ... here. Add r/rk/Upl alternative.
> (*addsi3_aarch64_uxtw): Add r/rk/Upl alternative.
> (*add<GPI>3 peepholes): New.
> (*add<GPI>3 splitters): New.
> * config/aarch64/constraints.md (Upl): New.
> * config/aarch64/predicates.md (aarch64_pluslong_strict_immedate): New.
>
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index f6c8eb1..bde231b 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1590,96 +1590,120 @@
> (plus:GPI (match_operand:GPI 1 "register_operand" "")
> (match_operand:GPI 2 "aarch64_pluslong_operand" "")))]
> ""
> - "
> - if (!aarch64_plus_operand (operands[2], VOIDmode))
> +{
> + if (aarch64_pluslong_strict_immedate (operands[2], <MODE>mode))
> {
> - if (can_create_pseudo_p ())
> - {
> - rtx tmp = gen_reg_rtx (<MODE>mode);
> - emit_move_insn (tmp, operands[2]);
> - operands[2] = tmp;
> - }
> - else
> + /* Give CSE the opportunity to share this constant across additions. */
> + if (!cse_not_expected && can_create_pseudo_p ())
> + operands[2] = force_reg (<MODE>mode, operands[2]);
> +
> + /* Split will refuse to operate on a modification to the stack pointer.
> + Aid the prologue and epilogue expanders by splitting this now. */
> + else if (reload_completed && operands[0] == stack_pointer_rtx)
> {
> - HOST_WIDE_INT imm = INTVAL (operands[2]);
> - imm = imm >= 0 ? imm & 0xfff : -(-imm & 0xfff);
> - emit_insn (gen_add<mode>3 (operands[0], operands[1],
> - GEN_INT (INTVAL (operands[2]) - imm)));
> + HOST_WIDE_INT i = INTVAL (operands[2]);
> + HOST_WIDE_INT s = (i >= 0 ? i & 0xfff : -(-i & 0xfff));
> + emit_insn (gen_rtx_SET (operands[0],
> + gen_rtx_PLUS (<MODE>mode, operands[1],
> + GEN_INT (i - s))));
> operands[1] = operands[0];
> - operands[2] = GEN_INT (imm);
> + operands[2] = GEN_INT (s);
> }
> }
> - "
> -)
> -
> -;; Find add with a 2-instruction immediate and merge into 2 add instructions.
> -
> -(define_insn_and_split "*add<mode>3_pluslong"
> - [(set
> - (match_operand:GPI 0 "register_operand" "=r")
> - (plus:GPI (match_operand:GPI 1 "register_operand" "r")
> - (match_operand:GPI 2 "aarch64_pluslong_immediate" "i")))]
> - "!aarch64_plus_operand (operands[2], VOIDmode)
> - && !aarch64_move_imm (INTVAL (operands[2]), <MODE>mode)"
> - "#"
> - "&& true"
> - [(set (match_dup 0) (plus:GPI (match_dup 1) (match_dup 3)))
> - (set (match_dup 0) (plus:GPI (match_dup 0) (match_dup 4)))]
> - "
> - {
> - HOST_WIDE_INT imm = INTVAL (operands[2]);
> - imm = imm >= 0 ? imm & 0xfff : -(-imm & 0xfff);
> - operands[3] = GEN_INT (INTVAL (operands[2]) - imm);
> - operands[4] = GEN_INT (imm);
> - }
> - "
> -)
> +})
>
> -(define_insn "*addsi3_aarch64"
> +(define_insn "*add<mode>3_aarch64"
> [(set
> - (match_operand:SI 0 "register_operand" "=rk,rk,w,rk")
> - (plus:SI
> - (match_operand:SI 1 "register_operand" "%rk,rk,w,rk")
> - (match_operand:SI 2 "aarch64_plus_operand" "I,r,w,J")))]
> + (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r")
> + (plus:GPI
> + (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk")
> + (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Upl")))]
> ""
> "@
> - add\\t%w0, %w1, %2
> - add\\t%w0, %w1, %w2
> - add\\t%0.2s, %1.2s, %2.2s
> - sub\\t%w0, %w1, #%n2"
> - [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm")
> - (set_attr "simd" "*,*,yes,*")]
> + add\\t%<w>0, %<w>1, %2
> + add\\t%<w>0, %<w>1, %<w>2
> + add\\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
> + sub\\t%<w>0, %<w>1, #%n2
> + #"
> + [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple")
> + (set_attr "simd" "*,*,yes,*,*")]
> )
>
> ;; zero_extend version of above
> (define_insn "*addsi3_aarch64_uxtw"
> [(set
> - (match_operand:DI 0 "register_operand" "=rk,rk,rk")
> + (match_operand:DI 0 "register_operand" "=rk,rk,rk,r")
> (zero_extend:DI
> - (plus:SI (match_operand:SI 1 "register_operand" "%rk,rk,rk")
> - (match_operand:SI 2 "aarch64_plus_operand" "I,r,J"))))]
> + (plus:SI (match_operand:SI 1 "register_operand" "%rk,rk,rk,rk")
> + (match_operand:SI 2 "aarch64_pluslong_operand" "I,r,J,Upl"))))]
> ""
> "@
> add\\t%w0, %w1, %2
> add\\t%w0, %w1, %w2
> - sub\\t%w0, %w1, #%n2"
> - [(set_attr "type" "alu_imm,alu_sreg,alu_imm")]
> + sub\\t%w0, %w1, #%n2
> + #"
> + [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple")]
> )
>
> -(define_insn "*adddi3_aarch64"
> - [(set
> - (match_operand:DI 0 "register_operand" "=rk,rk,rk,w")
> - (plus:DI
> - (match_operand:DI 1 "register_operand" "%rk,rk,rk,w")
> - (match_operand:DI 2 "aarch64_plus_operand" "I,r,J,w")))]
> - ""
> - "@
> - add\\t%x0, %x1, %2
> - add\\t%x0, %x1, %x2
> - sub\\t%x0, %x1, #%n2
> - add\\t%d0, %d1, %d2"
> - [(set_attr "type" "alu_imm,alu_sreg,alu_imm,neon_add")
> - (set_attr "simd" "*,*,*,yes")]
> +;; If there's a free register, and we can load the constant with a
> +;; single instruction, do so. This has a chance to improve scheduling.
> +(define_peephole2
> + [(match_scratch:GPI 3 "r")
> + (set (match_operand:GPI 0 "register_operand")
> + (plus:GPI
> + (match_operand:GPI 1 "register_operand")
> + (match_operand:GPI 2 "aarch64_pluslong_strict_immedate")))]
> + "aarch64_move_imm (INTVAL (operands[2]), <MODE>mode)"
> + [(set (match_dup 3) (match_dup 2))
> + (set (match_dup 0) (plus:GPI (match_dup 1) (match_dup 3)))]
> +)
> +
> +(define_peephole2
> + [(match_scratch:SI 3 "r")
> + (set (match_operand:DI 0 "register_operand")
> + (zero_extend:DI
> + (plus:SI
> + (match_operand:SI 1 "register_operand")
> + (match_operand:SI 2 "aarch64_pluslong_strict_immedate"))))]
> + "aarch64_move_imm (INTVAL (operands[2]), SImode)"
> + [(set (match_dup 3) (match_dup 2))
> + (set (match_dup 0) (zero_extend:DI (plus:SI (match_dup 1) (match_dup 3))))]
> +)
> +
> +;; After peephole2 has had a chance to run, split any remaining long
> +;; additions into two add immediates.
> +(define_split
> + [(set (match_operand:GPI 0 "register_operand")
> + (plus:GPI
> + (match_operand:GPI 1 "register_operand")
> + (match_operand:GPI 2 "aarch64_pluslong_strict_immedate")))]
> + "epilogue_completed"
> + [(set (match_dup 0) (plus:GPI (match_dup 1) (match_dup 3)))
> + (set (match_dup 0) (plus:GPI (match_dup 0) (match_dup 4)))]
> + {
> + HOST_WIDE_INT i = INTVAL (operands[2]);
> + HOST_WIDE_INT s = (i >= 0 ? i & 0xfff : -(-i & 0xfff));
> + operands[3] = GEN_INT (i - s);
> + operands[4] = GEN_INT (s);
> + }
> +)
> +
> +(define_split
> + [(set (match_operand:DI 0 "register_operand")
> + (zero_extend:DI
> + (plus:SI
> + (match_operand:SI 1 "register_operand")
> + (match_operand:SI 2 "aarch64_pluslong_strict_immedate"))))]
> + "epilogue_completed"
> + [(set (match_dup 5) (plus:SI (match_dup 1) (match_dup 3)))
> + (set (match_dup 0) (zero_extend:DI (plus:SI (match_dup 5) (match_dup 4))))]
> + {
> + HOST_WIDE_INT i = INTVAL (operands[2]);
> + HOST_WIDE_INT s = (i >= 0 ? i & 0xfff : -(-i & 0xfff));
> + operands[3] = GEN_INT (i - s);
> + operands[4] = GEN_INT (s);
> + operands[5] = gen_lowpart (SImode, operands[0]);
> + }
> )
>
> (define_expand "addti3"
> diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
> index 9b77291..0208b25 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -35,6 +35,11 @@
> (and (match_code "const_int")
> (match_test "aarch64_uimm12_shift (ival)")))
>
> +(define_constraint "Upl"
> + "A constraint that matches two uses of add instructions."
> + (and (match_code "const_int")
> + (match_test "aarch64_pluslong_strict_immedate (op, VOIDmode)")))
> +
> (define_constraint "J"
> "A constant that can be used with a SUB operation (once negated)."
> (and (match_code "const_int")
> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index a2eb69c..f3b514b 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -107,6 +107,10 @@
> (and (match_code "const_int")
> (match_test "(INTVAL (op) < 0xffffff && INTVAL (op) > -0xffffff)")))
>
> +(define_predicate "aarch64_pluslong_strict_immedate"
> + (and (match_operand 0 "aarch64_pluslong_immediate")
> + (not (match_operand 0 "aarch64_plus_immediate"))))
> +
> (define_predicate "aarch64_pluslong_operand"
> (ior (match_operand 0 "register_operand")
> (match_operand 0 "aarch64_pluslong_immediate")))
>
More information about the Gcc-patches
mailing list